From 43dd073838733d62d8e2754077db9c1e1328e1b9 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Tue, 8 Jul 2025 10:51:12 +0200 Subject: [PATCH 01/32] Initial experiments commit --- libs/exponential-histogram/build.gradle | 17 ++ .../exponentialhistogram/DownscaleStats.java | 61 ++++++ .../ExpHistoPercentiles.java | 58 ++++++ .../ExponentialHistogram.java | 37 ++++ .../ExponentialHistogramBuilder.java | 15 ++ .../ExponentialHistogramMerger.java | 93 +++++++++ .../ExponentialHistogramUtils.java | 164 +++++++++++++++ .../FixedSizeExponentialHistogram.java | 195 ++++++++++++++++++ .../MergingBucketIterator.java | 91 ++++++++ .../ScaleAdjustingBucketIterator.java | 76 +++++++ .../exponentialhistogram/ZeroBucket.java | 98 +++++++++ .../DownscaleStatsTest.java | 77 +++++++ .../ExponentialHistogramUtilsTest.java | 116 +++++++++++ .../FixedSizeExponentialHistogramTest.java | 71 +++++++ 14 files changed, 1169 insertions(+) create mode 100644 libs/exponential-histogram/build.gradle create mode 100644 libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java create mode 100644 libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExpHistoPercentiles.java create mode 100644 libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java create mode 100644 libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramBuilder.java create mode 100644 libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java create mode 100644 libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java create mode 100644 libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java create mode 100644 libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java create mode 100644 libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java create mode 100644 libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java create mode 100644 libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTest.java create mode 100644 libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTest.java create mode 100644 libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java diff --git a/libs/exponential-histogram/build.gradle b/libs/exponential-histogram/build.gradle new file mode 100644 index 0000000000000..50bf5d90a7da0 --- /dev/null +++ b/libs/exponential-histogram/build.gradle @@ -0,0 +1,17 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +// TODO: publish this when ready? +//apply plugin: 'elasticsearch.publish' + +dependencies { + compileOnly project(':libs:core') //just for @Nullable annotations + + testImplementation(project(":test:framework")) +} diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java new file mode 100644 index 0000000000000..bb2de85526506 --- /dev/null +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java @@ -0,0 +1,61 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.exponentialhistogram; + +public class DownscaleStats { + + // collapsedCount[i] represents the number of assitional + // collapsed buckets when increasing the scale by (i+1) instead of (i) + int[] collapsedCount = new int[63]; + + + void add(long previousBucketIndex, long currentBucketIndex) { + if (currentBucketIndex <= previousBucketIndex) { + throw new IllegalArgumentException("currentBucketIndex must be bigger than previousBucketIndex"); + } + /* Below is an efficient variant of the following algorithm: + for (int i=0; i<64; i++) { + if (prevIndex>>(i+1) == currIndex>>(i+1)) { + collapsedBucketCount[i]++; + break; + } + } + So we find the smallest scale reduction required to make the two buckets collapse into one + */ + long bitXor = previousBucketIndex ^ currentBucketIndex; + int numEqualLeadingBits = Long.numberOfLeadingZeros(bitXor); + if (numEqualLeadingBits == 0) { + // right-shifting will never make the buckets combine, because one is positive and the other negative + return; + } + int requiredScaleChange = 64 - numEqualLeadingBits; + collapsedCount[requiredScaleChange-1]++; + } + + int getCollapsedBucketCountAfterScaleReduction(int reduction) { + int totalCollapsed = 0; + for (int i = 0; i < reduction; i++) { + totalCollapsed += collapsedCount[i]; + } + return totalCollapsed; + } + + public int getRequiredScaleReductionToReduceBucketCountBy(int desiredReduction) { + if (desiredReduction == 0) { + return 0; + } + int totalCollapsed = 0; + for (int i = 0; i < collapsedCount.length; i++) { + totalCollapsed += collapsedCount[i]; + if (totalCollapsed >= desiredReduction) { + return i+1; + } + } + throw new IllegalArgumentException("it is not possible to reduce the bucket count by " + desiredReduction); + } +} diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExpHistoPercentiles.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExpHistoPercentiles.java new file mode 100644 index 0000000000000..7a9855ebea51c --- /dev/null +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExpHistoPercentiles.java @@ -0,0 +1,58 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.exponentialhistogram; + +public class ExpHistoPercentiles { + + + public static double getPercentile(ExponentialHistogram histo, double percentile) { + if (percentile < 0 || percentile > 1) { + throw new IllegalArgumentException("percentile must be in range [0, 1]"); + } + + long zeroCount = histo.zeroBucket().count(); + long negCount = getTotalCount(histo.negativeBuckets()); + long posCount = getTotalCount(histo.positiveBuckets()); + + long totalCount = zeroCount + negCount + posCount; + if (totalCount == 0) { + // Can't compute percentile on empty histogram + return Double.NaN; + } + // TODO: Maybe not round, but interpolate between? + long targetRank = Math.round((totalCount - 1) * percentile); + if (targetRank < negCount) { + return -getBucketMidpointForRank(histo.negativeBuckets(), (negCount - 1) - targetRank); + } else if (targetRank < (negCount + zeroCount)) { + return 0.0; // we are in the zero bucket + } else { + return getBucketMidpointForRank(histo.positiveBuckets(), targetRank - (negCount + zeroCount)); + } + } + + private static double getBucketMidpointForRank(ExponentialHistogram.BucketIterator buckets, long rank) { + long seenCount = 0; + while (buckets.hasNext()) { + seenCount+= buckets.peekCount(); + if (rank < seenCount) { + return ExponentialHistogramUtils.getPointOfLeastRelativeError(buckets.peekIndex(), buckets.scale()); + } + buckets.advance(); + } + throw new IllegalStateException("buckets contain in total less elements than the desired rank"); + } + + private static long getTotalCount(ExponentialHistogram.BucketIterator buckets) { + long count = 0; + while (buckets.hasNext()) { + count += buckets.peekCount(); + buckets.advance(); + } + return count; + } +} diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java new file mode 100644 index 0000000000000..20fffac06b173 --- /dev/null +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java @@ -0,0 +1,37 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.exponentialhistogram; + +public interface ExponentialHistogram { + + int scale(); + + ZeroBucket zeroBucket(); + + BucketIterator positiveBuckets(); + BucketIterator negativeBuckets(); + + /** + * Returns the highest populated bucket index, taking both negative and positive buckets into account; + * If there are no buckets populated, Long.MIN_VALUE shall be returned. + */ + long maximumBucketIndex(); + + /** + * Iterator over the non-empty buckets. + */ + interface BucketIterator { + boolean hasNext(); + long peekCount(); + long peekIndex(); + void advance(); + int scale(); + BucketIterator copy(); + } + +} diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramBuilder.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramBuilder.java new file mode 100644 index 0000000000000..444ee984decad --- /dev/null +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramBuilder.java @@ -0,0 +1,15 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.exponentialhistogram; + +public interface ExponentialHistogramBuilder { + + void setZeroBucket(ZeroBucket zeroBucket); + boolean tryAddBucket(long index, long count, boolean isPositive); + void resetBuckets(int newScale); +} diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java new file mode 100644 index 0000000000000..51ca037bdc742 --- /dev/null +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java @@ -0,0 +1,93 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.exponentialhistogram; + +public class ExponentialHistogramMerger { + + + // TODO: make this more efficient in case b is much smaller than a + public static void merge(ExponentialHistogramBuilder output, ExponentialHistogram a, ExponentialHistogram b) { + //first step: generate new zero threshold + ExponentialHistogram.BucketIterator posBucketsA = a.positiveBuckets(); + ExponentialHistogram.BucketIterator negBucketsA = a.negativeBuckets(); + ExponentialHistogram.BucketIterator posBucketsB = b.positiveBuckets(); + ExponentialHistogram.BucketIterator negBucketsB = b.negativeBuckets(); + + ZeroBucket zeroBucket = a.zeroBucket().merge(b.zeroBucket()); + zeroBucket = zeroBucket.collapseOverlappingBuckets(posBucketsA, negBucketsA, posBucketsB, negBucketsB); + + output.setZeroBucket(zeroBucket); + + // we will attempt to bring everything to the scale of A + // this might involve increasing the scale for B, which in turn would increase the indices + // we need to make sure to not exceed the numeric limits (64 bit) for those in this case + int targetScale = a.scale(); + if (targetScale > b.scale()) { + long highestIndex = b.maximumBucketIndex(); + if (highestIndex > Long.MIN_VALUE) { + targetScale = Math.min(targetScale, b.scale() + getMaximumScaleIncrease(highestIndex)); + } + if (posBucketsB.hasNext()) { + targetScale = Math.min(targetScale, b.scale() + getMaximumScaleIncrease(posBucketsB.peekIndex())); + } + if (negBucketsB.hasNext()) { + targetScale = Math.min(targetScale, b.scale() + getMaximumScaleIncrease(negBucketsB.peekIndex())); + } + } + + // Now we are sure that everything fits numerically into targetScale + // however, we might exceed our limit for the total number of buckets + // therefore we try the merging optimistically, and if we fail we reduce the target scale accordingly to make everything fit + + MergingBucketIterator positiveMerged = new MergingBucketIterator(posBucketsA.copy(), posBucketsB.copy(), targetScale); + MergingBucketIterator negativeMerged = new MergingBucketIterator(negBucketsA.copy(), negBucketsB.copy(), targetScale); + + output.resetBuckets(targetScale); + DownscaleStats downscaleStats = new DownscaleStats(); + int overflowCount = putBuckets(output, negativeMerged, false, downscaleStats); + overflowCount += putBuckets(output, positiveMerged, true, downscaleStats); + + if (overflowCount > 0) { + // UDD-sketch approach: we decrease the scale and retry + int reduction = downscaleStats.getRequiredScaleReductionToReduceBucketCountBy(overflowCount); + targetScale -= reduction; + output.resetBuckets(targetScale); + positiveMerged = new MergingBucketIterator(posBucketsA, posBucketsB, targetScale); + negativeMerged = new MergingBucketIterator(negBucketsA, negBucketsB, targetScale); + overflowCount = putBuckets(output, negativeMerged, false, null); + overflowCount += putBuckets(output, positiveMerged, true, null); + + if (overflowCount > 0) { + throw new IllegalStateException("Should never happen, the histogram should have had enough space"); + } + } + } + + private static int putBuckets(ExponentialHistogramBuilder output, ExponentialHistogram.BucketIterator buckets, boolean isPositive, @Nullable DownscaleStats downscaleStats) { + boolean collectDownScaleStatsOnNext = false; + long prevIndex = 0; + int overflowCount = 0; + while (buckets.hasNext()) { + long idx = buckets.peekIndex(); + if (collectDownScaleStatsOnNext) { + downscaleStats.add(prevIndex, idx); + } else { + collectDownScaleStatsOnNext = downscaleStats != null; + } + + if (output.tryAddBucket(idx, buckets.peekCount(), isPositive) == false) { + overflowCount++; + } + + prevIndex = idx; + buckets.advance(); + } + return overflowCount; + } + +} diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java new file mode 100644 index 0000000000000..3477ae18877a4 --- /dev/null +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java @@ -0,0 +1,164 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.exponentialhistogram; + +public class ExponentialHistogramUtils { + + /** Bit mask used to isolate exponent of IEEE 754 double precision number. */ + private static final long EXPONENT_BIT_MASK = 0x7FF0000000000000L; + + /** Bit mask used to isolate the significand of IEEE 754 double precision number. */ + private static final long SIGNIFICAND_BIT_MASK = 0xFFFFFFFFFFFFFL; + + /** Bias used in representing the exponent of IEEE 754 double precision number. */ + private static final int EXPONENT_BIAS = 1023; + + /** + * The number of bits used to represent the significand of IEEE 754 double precision number, + * excluding the implicit bit. + */ + private static final int SIGNIFICAND_WIDTH = 52; + + /** The number of bits used to represent the exponent of IEEE 754 double precision number. */ + private static final int EXPONENT_WIDTH = 11; + + private static final double LOG_BASE2_E = 1D / Math.log(2); + + + // Magic number, computed via log(4/3)/log(2^(2^-64)), but exact + private static final long SCALE_UP_64_OFFSET = 7656090530189244512L; + + static long adjustScale(long index, int scaleAdjustment) { + if (scaleAdjustment <= 0) { + return index >> -scaleAdjustment; + } else { + // ((index << 64) + SCALE_UP_64_OFFSET)) >> (64-scaleAdjustment) + // = index << scaleAdjustment + SCALE_UP_64_OFFSET >> (64-scaleAdjustment) + return (index << scaleAdjustment) + (SCALE_UP_64_OFFSET >> (64-scaleAdjustment)); + } + } + + /** + * Equivalent to mathematically correct comparison of the lower bucket boundaries of the given buckets + */ + public static int compareLowerBoundaries(long idxA, int scaleA, long idxB, int scaleB) { + if (scaleA > scaleB) { + return -compareLowerBoundaries(idxB, scaleB, idxA, scaleA); + } + // scaleA <= scaleB + int shifts = scaleB - scaleA; + int maxScaleAdjustment = getMaximumScaleIncrease(idxA); + if (maxScaleAdjustment < shifts) { + // we would overflow if we adjust A to the scale of B + // so if A is negative, scaling would produce a number less than Long.MIN_VALUE, therefore it is definitely smaller than B + // if A is positive, scaling would produce a number bigger than Long.MAX_VALUE, therefore it is definitely bigger than B + // if A is zero => shifting and therefore scale adjustment would not have any effect + if (idxA == 0) { + return Long.compare(0, idxB); + } else { + return idxA < 0 ? -1 : +1; + } + } else { + long adjustedIdxA = idxA << shifts; + return Long.compare(adjustedIdxA, idxB); + } + } + + /** + * Returns the maximum permissible scale-increase which does not cause an overflow + * of the index. + */ + public static int getMaximumScaleIncrease(long index) { + if (index < 0) { + index = ~index; + } + return Long.numberOfLeadingZeros(index) - 1; + } + + public static double getUpperBucketBoundary(long index, int scale) { + long nextIndex = index; + if (index < Long.MAX_VALUE) { + nextIndex++; + } + return getLowerBucketBoundary(nextIndex, scale); + } + + public static double getLowerBucketBoundary(long index, int scale) { + // TODO: handle numeric limits, implement exact algorithms with 128 bit precision + double inverseFactor = Math.pow(2, -scale); + return Math.pow(2, inverseFactor * index); + } + + public static double getPointOfLeastRelativeError(long bucketIndex, int scale) { + // TODO: handle numeric limits, implement exact algorithms with 128 bit precision + double inverseFactor = Math.pow(2, -scale); + return Math.pow(2, inverseFactor * (bucketIndex + 1/3.0)); + } + + /** + * Compute the index for the given value. + * + *

The algorithm to retrieve the index is specified in the OpenTelemetry + * specification. + * + * @param value Measured value (must be non-zero). + * @return the index of the bucket which the value maps to. + */ + static long computeIndex(double value, int scale) { + double absValue = Math.abs(value); + // For positive scales, compute the index by logarithm, which is simpler but may be + // inaccurate near bucket boundaries + if (scale > 0) { + return getIndexByLogarithm(absValue, scale); + } + // For scale zero, compute the exact index by extracting the exponent + if (scale == 0) { + return mapToIndexScaleZero(absValue); + } + // For negative scales, compute the exact index by extracting the exponent and shifting it to + // the right by -scale + return mapToIndexScaleZero(absValue) >> -scale; + } + + /** + * Compute the bucket index using a logarithm based approach. + * + * @see All + * Scales: Use the Logarithm Function + */ + private static long getIndexByLogarithm(double value, int scale) { + return (long) Math.ceil(Math.log(value) * computeScaleFactor(scale)) - 1; + } + + /** + * Compute the exact bucket index for scale zero by extracting the exponent. + * + * @see Scale + * Zero: Extract the Exponent + */ + private static long mapToIndexScaleZero(double value) { + long rawBits = Double.doubleToLongBits(value); + long rawExponent = (rawBits & EXPONENT_BIT_MASK) >> SIGNIFICAND_WIDTH; + long rawSignificand = rawBits & SIGNIFICAND_BIT_MASK; + if (rawExponent == 0) { + rawExponent -= Long.numberOfLeadingZeros(rawSignificand - 1) - EXPONENT_WIDTH - 1; + } + int ieeeExponent = (int) (rawExponent - EXPONENT_BIAS); + if (rawSignificand == 0) { + return ieeeExponent - 1; + } + return ieeeExponent; + } + + private static double computeScaleFactor(int scale) { + return Math.scalb(LOG_BASE2_E, scale); + } +} diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java new file mode 100644 index 0000000000000..f1b810eec2a8a --- /dev/null +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java @@ -0,0 +1,195 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.exponentialhistogram; + +import java.util.Arrays; + +public class FixedSizeExponentialHistogram implements ExponentialHistogramBuilder, ExponentialHistogram { + + // scale of 52 is the largest scale being able to represent the smallest and largest double numbers + // while giving a relative error less + public static final int DEFAULT_BUCKET_SCALE = 52; + + private final long[] bucketIndices; + private final long[] bucketCounts; + private int negativeBucketCount; + private int positiveBucketCount; + private int bucketScale; + + private ZeroBucket zeroBucket; + + public FixedSizeExponentialHistogram(int bucketCount) { + bucketCount = Math.max(bucketCount, 2); // we need at least two buckets, one for positive values, one for negative + bucketIndices = new long[bucketCount]; + bucketCounts = new long[bucketCount]; + reset(); + } + + void reset() { + setZeroBucket(ZeroBucket.minimalEmpty()); + resetBuckets(DEFAULT_BUCKET_SCALE); + } + + @Override + public void resetBuckets(int newScale) { + negativeBucketCount = 0; + positiveBucketCount = 0; + bucketScale = newScale; + } + + @Override + public ZeroBucket zeroBucket() { + return zeroBucket; + } + + @Override + public void setZeroBucket(ZeroBucket zeroBucket) { + this.zeroBucket = zeroBucket; + } + + @Override + public boolean tryAddBucket(long index, long count, boolean isPositive) { + if (isPositive == false && positiveBucketCount > 0) { + throw new IllegalArgumentException("Cannot add negative buckets after a positive bucket was added"); + } + int slot = negativeBucketCount + positiveBucketCount; + if (slot >= bucketCounts.length) { + return false; // no more space + } + bucketIndices[slot] = index; + bucketCounts[slot] = count; + if (isPositive) { + if (positiveBucketCount > 0 && bucketIndices[slot - 1] > index) { + throw new IllegalStateException("Buckets must be added in ascending index order!"); + } + positiveBucketCount++; + } else { + if (negativeBucketCount > 0 && bucketIndices[slot - 1] > index) { + throw new IllegalStateException("Buckets must be added in ascending index order!"); + } + negativeBucketCount++; + } + return true; + } + + private void addValue(double value) { + long idx = ExponentialHistogramUtils.computeIndex(value, scale()); + if (Math.abs(value) < zeroBucket.zeroThreshold()) { + throw new IllegalArgumentException("Cannot add zero values via this method"); + } + boolean success = tryAddBucket(idx, 1, value > 0); + if (!success) { + throw new IllegalArgumentException("Not enough buckets, failed to add value"); + } + } + + @Override + public int scale() { + return bucketScale; + } + + @Override + public BucketIterator negativeBuckets() { + return new BucketArrayIterator(0, negativeBucketCount); + } + + @Override + public long maximumBucketIndex() { + long maxIndex = Long.MIN_VALUE; + if (negativeBucketCount > 0) { + maxIndex = bucketIndices[negativeBucketCount - 1]; + } + if (positiveBucketCount > 0) { + maxIndex = Math.max(maxIndex, bucketIndices[negativeBucketCount + positiveBucketCount - 1]); + } + return maxIndex; + } + + @Override + public BucketIterator positiveBuckets() { + return new BucketArrayIterator(negativeBucketCount, negativeBucketCount + positiveBucketCount); + } + + private class BucketArrayIterator implements BucketIterator { + + int current; + final int limit; + + private BucketArrayIterator(int start, int limit) { + this.current = start; + this.limit = limit; + } + + @Override + public boolean hasNext() { + return current < limit; + } + + @Override + public long peekCount() { + if (hasNext() == false) { + throw new IllegalStateException("No more buckets"); + } + return bucketCounts[current]; + } + + @Override + public long peekIndex() { + if (hasNext() == false) { + throw new IllegalStateException("No more buckets"); + } + return bucketIndices[current]; + } + + @Override + public void advance() { + if (hasNext() == false) { + throw new IllegalStateException("No more buckets"); + } + current++; + } + + @Override + public int scale() { + return FixedSizeExponentialHistogram.this.scale(); + } + + @Override + public BucketIterator copy() { + return new BucketArrayIterator(current, limit); + } + } + + public static FixedSizeExponentialHistogram createForValues(double... values) { + FixedSizeExponentialHistogram result = new FixedSizeExponentialHistogram(values.length); + double[] copy = Arrays.copyOf(values, values.length); + Arrays.sort(copy); + + int negativeCount = 0; + int zeroCount = 0; + + for (double val : copy) { + if (val < 0) { + negativeCount++; + } else if (val == 0){ + zeroCount++; + } else { + break; + } + } + + for (int i = negativeCount - 1; i >= 0; i--) { + result.addValue(copy[i]); + } + result.setZeroBucket(ZeroBucket.minimalWithCount(zeroCount)); + for (int i = negativeCount + zeroCount; i < copy.length; i++) { + result.addValue(copy[i]); + } + return result; + } +} diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java new file mode 100644 index 0000000000000..7c6317172b596 --- /dev/null +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java @@ -0,0 +1,91 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.exponentialhistogram; + +public class MergingBucketIterator implements ExponentialHistogram.BucketIterator { + + private final ExponentialHistogram.BucketIterator itA; + private final ExponentialHistogram.BucketIterator itB; + + private boolean endReached; + private long currentIndex; + private long currentCount; + + public MergingBucketIterator(ExponentialHistogram.BucketIterator itA, ExponentialHistogram.BucketIterator itB, int targetScale) { + this.itA = new ScaleAdjustingBucketIterator(itA, targetScale); + this.itB = new ScaleAdjustingBucketIterator(itB, targetScale); + endReached = false; + advance(); + } + + @Override + public void advance() { + boolean hasNextA = itA.hasNext() ; + boolean hasNextB = itB.hasNext(); + endReached = hasNextA == false && hasNextB == false; + if (endReached) { + return; + } + long idxA = 0; + long idxB = 0; + if (hasNextA) { + idxA = itA.peekIndex(); + } + if (hasNextB) { + idxB = itB.peekIndex(); + } + + currentCount = 0; + boolean advanceA = hasNextA && (hasNextB == false || idxA <= idxB); + boolean advanceB = hasNextB && (hasNextA == false || idxB <= idxA); + if (advanceA) { + currentIndex = idxA; + currentCount += itA.peekCount(); + itA.advance(); + } + if (advanceB) { + currentIndex = idxB; + currentCount += itB.peekCount(); + itB.advance(); + } + } + + @Override + public boolean hasNext() { + return endReached == false; + } + + @Override + public long peekCount() { + assertEndNotReached(); + return currentCount; + } + + + @Override + public long peekIndex() { + assertEndNotReached(); + return currentIndex; + } + + @Override + public int scale() { + return itA.scale(); + } + + @Override + public ExponentialHistogram.BucketIterator copy() { + throw new UnsupportedOperationException(); + } + + private void assertEndNotReached() { + if (endReached) { + throw new IllegalStateException("No more buckets"); + } + } +} diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java new file mode 100644 index 0000000000000..4db8f960ad2f3 --- /dev/null +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java @@ -0,0 +1,76 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.exponentialhistogram; + +import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.adjustScale; + +public class ScaleAdjustingBucketIterator implements ExponentialHistogram.BucketIterator { + + private final ExponentialHistogram.BucketIterator delegate; + private final int scaleAdjustment; + + private long currentIndex; + private long currentCount; + boolean hasNextValue; + + public ScaleAdjustingBucketIterator(ExponentialHistogram.BucketIterator delegate, int targetScale) { + this.delegate = delegate; + scaleAdjustment = targetScale - delegate.scale(); + hasNextValue = true; + advance(); + } + + @Override + public boolean hasNext() { + return hasNextValue; + } + + @Override + public long peekCount() { + assertEndNotReached(); + return currentCount; + } + + @Override + public long peekIndex() { + assertEndNotReached(); + return currentIndex; + } + + @Override + public void advance() { + assertEndNotReached(); + hasNextValue = delegate.hasNext(); + if (hasNextValue == false) { + return; + } + currentIndex = adjustScale(delegate.peekIndex(), scaleAdjustment); + currentCount = delegate.peekCount(); + delegate.advance(); + while (delegate.hasNext() && adjustScale(delegate.peekIndex(), scaleAdjustment) == currentIndex) { + currentCount += delegate.peekCount(); + delegate.advance(); + } + } + + private void assertEndNotReached() { + if (hasNextValue == false) { + throw new IllegalStateException("no more buckets available"); + } + } + + @Override + public int scale() { + return delegate.scale() + scaleAdjustment; + } + + @Override + public ExponentialHistogram.BucketIterator copy() { + throw new UnsupportedOperationException(); + } +} diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java new file mode 100644 index 0000000000000..671f241ee7f14 --- /dev/null +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java @@ -0,0 +1,98 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.exponentialhistogram; + +import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.compareLowerBoundaries; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.getLowerBucketBoundary; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.computeIndex; +import static org.elasticsearch.exponentialhistogram.FixedSizeExponentialHistogram.DEFAULT_BUCKET_SCALE; + +public record ZeroBucket(long index, int scale, long count) { + + private static final ZeroBucket MINIMAL_EMPTY = new ZeroBucket(Long.MIN_VALUE, Integer.MIN_VALUE/256, 0); + + public ZeroBucket(long index, int scale) { + this(index, scale, 0); + } + + public ZeroBucket(double zeroThreshold, long count) { + this(computeIndex(zeroThreshold, DEFAULT_BUCKET_SCALE) + 1, DEFAULT_BUCKET_SCALE, count); + } + + public static ZeroBucket minimalEmpty() { + return MINIMAL_EMPTY; + } + + public static ZeroBucket minimalWithCount(long count) { + if (count == 0) { + return MINIMAL_EMPTY; + } else { + return new ZeroBucket(MINIMAL_EMPTY.index, MINIMAL_EMPTY.scale(), count); + } + } + + /** + * Merges this zero-bucket with a given other one: + * * If the other zero-bucket is empty, the current one is returned unchanged + * * Otherwise the zero-threshold is increased if required and the counts are summed up + */ + public ZeroBucket merge(ZeroBucket other) { + if (other.count == 0) { + return this; + } else { + long totalCount = count + other.count; + // both are populate, we need to use the higher zero-threshold + if (this.compareZeroThreshold(other) >= 0) { + return new ZeroBucket(index, scale, totalCount); + } else { + return new ZeroBucket(other.index, other.scale, totalCount); + } + } + } + + public ZeroBucket collapseOverlappingBuckets(ExponentialHistogram.BucketIterator... bucketIterators) { + ZeroBucket current = this; + ZeroBucket previous; + do { + previous = current; + for (ExponentialHistogram.BucketIterator buckets: bucketIterators) { + current = current.collapseOverlappingBuckets(buckets); + } + } while (previous.compareZeroThreshold(current) != 0); + return current; + } + + public int compareZeroThreshold(ZeroBucket other) { + return compareLowerBoundaries(index, scale, other.index, other.scale); + } + + public double zeroThreshold() { + return getLowerBucketBoundary(index, scale); + } + + /** + * Collapses all buckets from the given iterator whose lower boundary is smaller than the zero threshold. + * The iterator is advanced to point at the first, non-collapsed bucket. + */ + public ZeroBucket collapseOverlappingBuckets(ExponentialHistogram.BucketIterator buckets) { + + long collapsedCount = 0; + long highestCollapsedIndex = 0; + while (buckets.hasNext() && compareLowerBoundaries(buckets.peekIndex(), buckets.scale(), index, scale) < 0) { + highestCollapsedIndex = buckets.peekIndex(); + collapsedCount += buckets.peekCount(); + buckets.advance(); + } + if (collapsedCount == 0) { + return this; + } else { + // +1 because we need to adjust the zero threshold to the upper boundary of the collapsed bucket + return new ZeroBucket(highestCollapsedIndex + 1, buckets.scale(), count + collapsedCount); + } + } +} diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTest.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTest.java new file mode 100644 index 0000000000000..15817272ab00f --- /dev/null +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTest.java @@ -0,0 +1,77 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.exponentialhistogram; + +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Random; +import java.util.stream.IntStream; +import java.util.stream.LongStream; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; + +public class DownscaleStatsTest { + + @Test + public void exponential() { + long[] values = IntStream.range(0, 100).mapToLong(i -> (long) Math.min(Integer.MAX_VALUE, Math.pow(1.1, i))).distinct().toArray(); + verifyFor(values); + } + + @Test + public void numericalLimits() { + verifyFor(Long.MIN_VALUE, Long.MAX_VALUE); + } + + @Test + public void random() { + Random rnd = new Random(42); + + for (int i = 0; i < 100; i++) { + List values = IntStream.range(0, 10_000).mapToObj(j -> rnd.nextLong()).distinct().toList(); + verifyFor(values); + } + } + + void verifyFor(long... indices) { + verifyFor(LongStream.of(indices).boxed().toList()); + } + + void verifyFor(Collection indices) { + // sanity check, we require unique indices + assertThat(indices.size(), equalTo(new HashSet<>(indices).size())); + + List sorted = new ArrayList<>(indices); + sorted.sort(Long::compareTo); + + DownscaleStats stats = new DownscaleStats(); + for (int i = 1; i < sorted.size(); i++) { + long prev = sorted.get(i - 1); + long curr = sorted.get(i); + stats.add(prev, curr); + } + + for (int i = 0; i < 64; i++) { + int scaleReduction = i; + long remainingCount = indices.stream().mapToLong(Long::longValue).map(index -> index >> scaleReduction).distinct().count(); + long reduction = sorted.size() - remainingCount; + + assertThat( + "Expected size after reduction of " + i + " to match", + stats.getCollapsedBucketCountAfterScaleReduction(scaleReduction), + equalTo((int) reduction) + ); + } + + } +} diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTest.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTest.java new file mode 100644 index 0000000000000..9fee2e91b7fb6 --- /dev/null +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTest.java @@ -0,0 +1,116 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.exponentialhistogram; + +import org.junit.Test; + +import java.util.Random; + +import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.adjustScale; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.compareLowerBoundaries; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.getLowerBucketBoundary; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.getMaximumScaleIncrease; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.getUpperBucketBoundary; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.lessThanOrEqualTo; +import static org.junit.Assert.assertThrows; + +public class ExponentialHistogramUtilsTest { + + @Test + public void testMaxValue() { + assertThat(getMaximumScaleIncrease(Long.MAX_VALUE), equalTo(0)); + assertThat(getMaximumScaleIncrease(Long.MAX_VALUE >> 1), equalTo(1)); + + assertThat(adjustScale(Long.MAX_VALUE, 0), equalTo(Long.MAX_VALUE)); + assertThat(adjustScale(Long.MAX_VALUE >> 1, 1), equalTo(Long.MAX_VALUE - 1)); + assertThat(adjustScale(Long.MAX_VALUE >> 2, 2), equalTo((Long.MAX_VALUE & ~3) + 1)); + assertThat(adjustScale(Long.MAX_VALUE >> 4, 4), equalTo((Long.MAX_VALUE & ~15) + 6)); + } + + @Test + public void testMinValue() { + assertThat(getMaximumScaleIncrease(Long.MIN_VALUE), equalTo(0)); + assertThat(getMaximumScaleIncrease(Long.MIN_VALUE >> 1), equalTo(1)); + + assertThat(adjustScale(Long.MIN_VALUE, 0), equalTo(Long.MIN_VALUE)); + assertThat(adjustScale(Long.MIN_VALUE >> 1, 1), equalTo(Long.MIN_VALUE)); + assertThat(adjustScale(Long.MIN_VALUE >> 2, 2), equalTo((Long.MIN_VALUE & ~3) + 1)); + assertThat(adjustScale(Long.MIN_VALUE >> 4, 4), equalTo((Long.MIN_VALUE & ~15) + 6)); + } + + @Test + public void testRandom() { + Random rnd = new Random(42); + + for (int i=0; i<100_000; i++) { + long index = rnd.nextLong(); + int maxScale = getMaximumScaleIncrease(index); + + assertThat(adjustScale(adjustScale(index, maxScale), -maxScale), equalTo(index)); + assertThrows(ArithmeticException.class, () -> Math.multiplyExact(adjustScale(index, maxScale), 2)); + } + + } + + + @Test + public void randomComparison() { + Random rnd = new Random(42); + + for (int i=0; i<100_000; i++) { + long indexA = rnd.nextLong(); + long indexB = rnd.nextLong(); + int scaleA = rnd.nextInt() % 40; + int scaleB = rnd.nextInt() % 40; + + + double lowerBoundA = getLowerBucketBoundary(indexA, scaleA); + while (Double.isInfinite(lowerBoundA)) { + indexA = indexA >> 1; + lowerBoundA = getLowerBucketBoundary(indexA, scaleA); + } + double lowerBoundB = getLowerBucketBoundary(indexB, scaleB); + while (Double.isInfinite(lowerBoundB)) { + indexB = indexB >> 1; + lowerBoundB = getLowerBucketBoundary(indexB, scaleB); + } + + if (lowerBoundA != lowerBoundB) { + System.out.println("Comparing "+lowerBoundA+" to "+lowerBoundB); + assertThat( + Double.compare(lowerBoundA, lowerBoundB), equalTo(compareLowerBoundaries(indexA, scaleA, indexB, scaleB))); + } + } + + } + + @Test + public void testScalingUpToMidpoint() { + long midpointIndex = adjustScale(0, 64); + double lowerBoundary = getLowerBucketBoundary(midpointIndex, 64); + double upperBoundary = getUpperBucketBoundary(midpointIndex, 64); + + // due to limited double-float precision the results are actually exact + assertThat(lowerBoundary, equalTo(4.0/3.0)); + assertThat(upperBoundary, equalTo(4.0/3.0)); + } + + @Test + public void testSaneBucketBoundaries() { + assertThat(getLowerBucketBoundary(0, 42), equalTo(1.0)); + assertThat(getLowerBucketBoundary(1, 0), equalTo(2.0)); + assertThat(getLowerBucketBoundary(1, -1), equalTo(4.0)); + assertThat(getLowerBucketBoundary(1, -2), equalTo(16.0)); + + double limit1 = getLowerBucketBoundary(Long.MAX_VALUE-1, 56); + double limit2 = getLowerBucketBoundary(Long.MAX_VALUE, 56); + assertThat(limit1, lessThanOrEqualTo(limit2)); + } +} diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java new file mode 100644 index 0000000000000..78723e6c5cce4 --- /dev/null +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java @@ -0,0 +1,71 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.exponentialhistogram; + +import org.junit.Test; + +import java.util.stream.LongStream; + +public class FixedSizeExponentialHistogramTest { + + + @Test + public void testPrintBuckets() { + ExponentialHistogram first = FixedSizeExponentialHistogram.createForValues(0.01234, 42, 56789); + ExponentialHistogram second = FixedSizeExponentialHistogram.createForValues(38, 50, 250, 257, 10001.1234); + FixedSizeExponentialHistogram result = new FixedSizeExponentialHistogram(7); + ExponentialHistogramMerger.merge(result, first, second); + printMidpoints(result); + } + + + @Test + public void testPrintBucketsLinearScale() { + FixedSizeExponentialHistogram akkumulator = new FixedSizeExponentialHistogram(1000); + // akkumulator.setZeroBucket(new ZeroBucket(100.0, 42)); + + for (int i=0; i<2_000; i++) { + ExponentialHistogram prev = akkumulator; + double[] values = LongStream.range(i * 1000, (i + 1) * 1000).mapToDouble(val -> -val).toArray(); + akkumulator = new FixedSizeExponentialHistogram(10000); + ExponentialHistogram toMerge = FixedSizeExponentialHistogram.createForValues(values); + ExponentialHistogramMerger.merge(akkumulator, prev, toMerge); + } + + double smallPerc = ExpHistoPercentiles.getPercentile(akkumulator, 0.00001); + double highPerc = ExpHistoPercentiles.getPercentile(akkumulator, 0.9999); + double median = ExpHistoPercentiles.getPercentile(akkumulator, 0.5); + + printMidpoints(akkumulator); + } + + private static void printMidpoints(ExponentialHistogram histo) { + StringBuilder sb = new StringBuilder("{ base : "); + sb.append(ExponentialHistogramUtils.getLowerBucketBoundary(1, histo.scale())).append(", "); + ExponentialHistogram.BucketIterator neg = histo.negativeBuckets(); + while (neg.hasNext()) { + long idx = neg.peekIndex(); + long count = neg.peekCount(); + double center = -ExponentialHistogramUtils.getPointOfLeastRelativeError(idx, neg.scale()); + sb.append(center).append(":").append(count).append(", "); + neg.advance(); + } + sb.append("0.0 : ").append(histo.zeroBucket().count()); + ExponentialHistogram.BucketIterator pos = histo.positiveBuckets(); + while (pos.hasNext()) { + long idx = pos.peekIndex(); + long count = pos.peekCount(); + double center = ExponentialHistogramUtils.getPointOfLeastRelativeError(idx, pos.scale()); + sb.append(", ").append(center).append(":").append(count); + pos.advance(); + } + sb.append('}'); + System.out.println(sb); + + } +} From da159a90c152244c43e1ae51c951fe667df194c8 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Tue, 8 Jul 2025 11:40:11 +0200 Subject: [PATCH 02/32] Clean up generator --- .../ExpHistoGenerator.java | 116 ++++++++++++++++++ .../ExponentialHistogramMerger.java | 4 + .../FixedSizeExponentialHistogram.java | 38 ------ .../FixedSizeExponentialHistogramTest.java | 26 ++-- 4 files changed, 131 insertions(+), 53 deletions(-) create mode 100644 libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExpHistoGenerator.java diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExpHistoGenerator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExpHistoGenerator.java new file mode 100644 index 0000000000000..00097cb852e93 --- /dev/null +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExpHistoGenerator.java @@ -0,0 +1,116 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.exponentialhistogram; + +import java.util.Arrays; +import java.util.stream.DoubleStream; + +import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.computeIndex; + +/** + * Class for generating a histogram from raw values. + */ +public class ExpHistoGenerator { + + private final double[] rawValueBuffer; + int valueCount; + + private boolean isFinished; + + private FixedSizeExponentialHistogram result; + private FixedSizeExponentialHistogram mergeBuffer; + private FixedSizeExponentialHistogram valueBuffer; + + + public ExpHistoGenerator(int numBuckets) { + rawValueBuffer = new double[numBuckets]; + valueCount = 0; + result = new FixedSizeExponentialHistogram(numBuckets); + mergeBuffer = new FixedSizeExponentialHistogram(numBuckets); + valueBuffer = new FixedSizeExponentialHistogram(numBuckets); + } + + public void add(double value) { + if (isFinished) { + throw new IllegalStateException("get() has already been called"); + } + if (valueCount == rawValueBuffer.length) { + mergeValuesToHistogram(); + } + rawValueBuffer[valueCount] = value; + valueCount++; + } + + public ExponentialHistogram get() { + if (isFinished) { + throw new IllegalStateException("get() has already been called"); + } + isFinished = true; + mergeValuesToHistogram(); + return result; + } + + public static ExponentialHistogram createFor(double... values) { + return createFor(values.length, Arrays.stream(values)); + } + + public static ExponentialHistogram createFor(int bucketCount, DoubleStream values) { + ExpHistoGenerator generator = new ExpHistoGenerator(bucketCount); + values.forEach(generator::add); + return generator.get(); + } + + private void mergeValuesToHistogram() { + if (valueCount == 0) { + return; + } + Arrays.sort(rawValueBuffer, 0, valueCount); + int negativeValuesCount = 0; + while (negativeValuesCount < valueCount && rawValueBuffer[negativeValuesCount] < 0) { + negativeValuesCount++; + } + + valueBuffer.reset(); + int scale = valueBuffer.scale(); + + for (int i = negativeValuesCount - 1; i >= 0; i--) { + long count = 1; + long index = computeIndex(rawValueBuffer[i], scale); + while ((i-1) >= 0 && computeIndex(rawValueBuffer[i-1] , scale) == index) { + i--; + count++; + } + valueBuffer.tryAddBucket(index, count, false); + } + + int zeroCount = 0; + while((negativeValuesCount + zeroCount) < valueCount && rawValueBuffer[negativeValuesCount+zeroCount] == 0) { + zeroCount++; + } + valueBuffer.setZeroBucket(ZeroBucket.minimalWithCount(zeroCount)); + for (int i= negativeValuesCount + zeroCount; i < valueCount; i++) { + long count = 1; + long index = computeIndex(rawValueBuffer[i], scale); + while ((i+1) < valueCount && computeIndex(rawValueBuffer[i+1] , scale) == index) { + i++; + count++; + } + valueBuffer.tryAddBucket(index, count, true); + } + valueCount = 0; + + ExponentialHistogramMerger.merge(mergeBuffer, result, valueBuffer); + FixedSizeExponentialHistogram temp = result; + result = mergeBuffer; + mergeBuffer = temp; + } + + +} diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java index 51ca037bdc742..5e5abaffe9a40 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java @@ -7,6 +7,10 @@ package org.elasticsearch.exponentialhistogram; +import org.elasticsearch.core.Nullable; + +import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.getMaximumScaleIncrease; + public class ExponentialHistogramMerger { diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java index f1b810eec2a8a..13cb5aa5169a4 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java @@ -77,17 +77,6 @@ public boolean tryAddBucket(long index, long count, boolean isPositive) { return true; } - private void addValue(double value) { - long idx = ExponentialHistogramUtils.computeIndex(value, scale()); - if (Math.abs(value) < zeroBucket.zeroThreshold()) { - throw new IllegalArgumentException("Cannot add zero values via this method"); - } - boolean success = tryAddBucket(idx, 1, value > 0); - if (!success) { - throw new IllegalArgumentException("Not enough buckets, failed to add value"); - } - } - @Override public int scale() { return bucketScale; @@ -165,31 +154,4 @@ public BucketIterator copy() { } } - public static FixedSizeExponentialHistogram createForValues(double... values) { - FixedSizeExponentialHistogram result = new FixedSizeExponentialHistogram(values.length); - double[] copy = Arrays.copyOf(values, values.length); - Arrays.sort(copy); - - int negativeCount = 0; - int zeroCount = 0; - - for (double val : copy) { - if (val < 0) { - negativeCount++; - } else if (val == 0){ - zeroCount++; - } else { - break; - } - } - - for (int i = negativeCount - 1; i >= 0; i--) { - result.addValue(copy[i]); - } - result.setZeroBucket(ZeroBucket.minimalWithCount(zeroCount)); - for (int i = negativeCount + zeroCount; i < copy.length; i++) { - result.addValue(copy[i]); - } - return result; - } } diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java index 78723e6c5cce4..f15d74498799f 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java @@ -9,6 +9,7 @@ import org.junit.Test; +import java.util.stream.IntStream; import java.util.stream.LongStream; public class FixedSizeExponentialHistogramTest { @@ -16,8 +17,8 @@ public class FixedSizeExponentialHistogramTest { @Test public void testPrintBuckets() { - ExponentialHistogram first = FixedSizeExponentialHistogram.createForValues(0.01234, 42, 56789); - ExponentialHistogram second = FixedSizeExponentialHistogram.createForValues(38, 50, 250, 257, 10001.1234); + ExponentialHistogram first = ExpHistoGenerator.createFor(0.01234, 42, 56789); + ExponentialHistogram second = ExpHistoGenerator.createFor(38, 50, 250, 257, 10001.1234); FixedSizeExponentialHistogram result = new FixedSizeExponentialHistogram(7); ExponentialHistogramMerger.merge(result, first, second); printMidpoints(result); @@ -26,22 +27,17 @@ public void testPrintBuckets() { @Test public void testPrintBucketsLinearScale() { - FixedSizeExponentialHistogram akkumulator = new FixedSizeExponentialHistogram(1000); - // akkumulator.setZeroBucket(new ZeroBucket(100.0, 42)); - for (int i=0; i<2_000; i++) { - ExponentialHistogram prev = akkumulator; - double[] values = LongStream.range(i * 1000, (i + 1) * 1000).mapToDouble(val -> -val).toArray(); - akkumulator = new FixedSizeExponentialHistogram(10000); - ExponentialHistogram toMerge = FixedSizeExponentialHistogram.createForValues(values); - ExponentialHistogramMerger.merge(akkumulator, prev, toMerge); - } + ExponentialHistogram result = ExpHistoGenerator.createFor( + 1000, + IntStream.range(-1_000_000, 2_000_000).mapToDouble(Double::valueOf) + ); - double smallPerc = ExpHistoPercentiles.getPercentile(akkumulator, 0.00001); - double highPerc = ExpHistoPercentiles.getPercentile(akkumulator, 0.9999); - double median = ExpHistoPercentiles.getPercentile(akkumulator, 0.5); + double smallPerc = ExpHistoPercentiles.getPercentile(result, 0.00001); + double highPerc = ExpHistoPercentiles.getPercentile(result, 0.9999); + double median = ExpHistoPercentiles.getPercentile(result, 0.5); - printMidpoints(akkumulator); + printMidpoints(result); } private static void printMidpoints(ExponentialHistogram histo) { From fa4efe06bbf09fb7957754a8b108d7b31cb42882 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Tue, 8 Jul 2025 11:56:26 +0200 Subject: [PATCH 03/32] Clean up merger --- .../ExpHistoGenerator.java | 20 +++---- .../ExponentialHistogramMerger.java | 54 ++++++++++++++++--- .../FixedSizeExponentialHistogramTest.java | 5 +- 3 files changed, 57 insertions(+), 22 deletions(-) diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExpHistoGenerator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExpHistoGenerator.java index 00097cb852e93..3e72ccbbbbdfc 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExpHistoGenerator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExpHistoGenerator.java @@ -22,19 +22,16 @@ public class ExpHistoGenerator { private final double[] rawValueBuffer; int valueCount; - private boolean isFinished; - - private FixedSizeExponentialHistogram result; - private FixedSizeExponentialHistogram mergeBuffer; - private FixedSizeExponentialHistogram valueBuffer; + private final ExponentialHistogramMerger resultMerger; + private final FixedSizeExponentialHistogram valueBuffer; + private boolean isFinished = false; public ExpHistoGenerator(int numBuckets) { rawValueBuffer = new double[numBuckets]; valueCount = 0; - result = new FixedSizeExponentialHistogram(numBuckets); - mergeBuffer = new FixedSizeExponentialHistogram(numBuckets); valueBuffer = new FixedSizeExponentialHistogram(numBuckets); + resultMerger = new ExponentialHistogramMerger(numBuckets); } public void add(double value) { @@ -54,7 +51,7 @@ public ExponentialHistogram get() { } isFinished = true; mergeValuesToHistogram(); - return result; + return resultMerger.get(); } public static ExponentialHistogram createFor(double... values) { @@ -104,12 +101,9 @@ private void mergeValuesToHistogram() { } valueBuffer.tryAddBucket(index, count, true); } - valueCount = 0; - ExponentialHistogramMerger.merge(mergeBuffer, result, valueBuffer); - FixedSizeExponentialHistogram temp = result; - result = mergeBuffer; - mergeBuffer = temp; + resultMerger.add(valueBuffer); + valueCount = 0; } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java index 5e5abaffe9a40..5ffe1c3f1a1c4 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java @@ -9,14 +9,54 @@ import org.elasticsearch.core.Nullable; +import java.util.Arrays; +import java.util.stream.Stream; + import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.getMaximumScaleIncrease; public class ExponentialHistogramMerger { + FixedSizeExponentialHistogram result; + FixedSizeExponentialHistogram buffer; + + private boolean isFinished; + + public ExponentialHistogramMerger(int resultBucketCount) { + result = new FixedSizeExponentialHistogram(resultBucketCount); + buffer = new FixedSizeExponentialHistogram(resultBucketCount); + } + + public void add(ExponentialHistogram toAdd) { + if (isFinished) { + throw new IllegalStateException("get() has already been called"); + } + merge(buffer, result, toAdd); + FixedSizeExponentialHistogram temp = result; + result = buffer; + buffer = temp; + } + + public ExponentialHistogram get() { + if (isFinished) { + throw new IllegalStateException("get() has already been called"); + } + isFinished = true; + return result; + } + + public static ExponentialHistogram merge(int bucketCount, ExponentialHistogram... histograms) { + return merge(bucketCount, Arrays.stream(histograms)); + } + + public static ExponentialHistogram merge(int bucketCount, Stream histograms) { + ExponentialHistogramMerger merger = new ExponentialHistogramMerger(bucketCount); + histograms.forEach(merger::add); + return merger.get(); + } + // TODO: make this more efficient in case b is much smaller than a - public static void merge(ExponentialHistogramBuilder output, ExponentialHistogram a, ExponentialHistogram b) { - //first step: generate new zero threshold + private static void merge(ExponentialHistogramBuilder output, ExponentialHistogram a, ExponentialHistogram b) { ExponentialHistogram.BucketIterator posBucketsA = a.positiveBuckets(); ExponentialHistogram.BucketIterator negBucketsA = a.negativeBuckets(); ExponentialHistogram.BucketIterator posBucketsB = b.positiveBuckets(); @@ -32,16 +72,18 @@ public static void merge(ExponentialHistogramBuilder output, ExponentialHistogra // we need to make sure to not exceed the numeric limits (64 bit) for those in this case int targetScale = a.scale(); if (targetScale > b.scale()) { - long highestIndex = b.maximumBucketIndex(); - if (highestIndex > Long.MIN_VALUE) { - targetScale = Math.min(targetScale, b.scale() + getMaximumScaleIncrease(highestIndex)); - } + boolean isNonEmpty = false; if (posBucketsB.hasNext()) { + isNonEmpty = true; targetScale = Math.min(targetScale, b.scale() + getMaximumScaleIncrease(posBucketsB.peekIndex())); } if (negBucketsB.hasNext()) { + isNonEmpty = true; targetScale = Math.min(targetScale, b.scale() + getMaximumScaleIncrease(negBucketsB.peekIndex())); } + if (isNonEmpty) { + targetScale = Math.min(targetScale, b.scale() + getMaximumScaleIncrease( b.maximumBucketIndex())); + } } // Now we are sure that everything fits numerically into targetScale diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java index f15d74498799f..4806b230876e6 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java @@ -10,7 +10,6 @@ import org.junit.Test; import java.util.stream.IntStream; -import java.util.stream.LongStream; public class FixedSizeExponentialHistogramTest { @@ -19,8 +18,8 @@ public class FixedSizeExponentialHistogramTest { public void testPrintBuckets() { ExponentialHistogram first = ExpHistoGenerator.createFor(0.01234, 42, 56789); ExponentialHistogram second = ExpHistoGenerator.createFor(38, 50, 250, 257, 10001.1234); - FixedSizeExponentialHistogram result = new FixedSizeExponentialHistogram(7); - ExponentialHistogramMerger.merge(result, first, second); + + ExponentialHistogram result = ExponentialHistogramMerger.merge(7, first, second); printMidpoints(result); } From fba967f8a9a5e369f8317c0b7d9765a26a65f090 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Wed, 9 Jul 2025 11:39:16 +0200 Subject: [PATCH 04/32] More tests, a bit of cleanup --- libs/exponential-histogram/build.gradle | 1 + ...ava => ExponentialHistogramGenerator.java} | 6 +- .../ExponentialHistogramMerger.java | 7 + .../exponentialhistogram/ZeroBucket.java | 13 +- .../ExponentialHistogramMergerTest.java | 139 ++++++++ .../FixedSizeExponentialHistogramTest.java | 8 +- .../PercentileAccuracyTest.java | 307 ++++++++++++++++++ 7 files changed, 469 insertions(+), 12 deletions(-) rename libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/{ExpHistoGenerator.java => ExponentialHistogramGenerator.java} (94%) create mode 100644 libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTest.java create mode 100644 libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/PercentileAccuracyTest.java diff --git a/libs/exponential-histogram/build.gradle b/libs/exponential-histogram/build.gradle index 50bf5d90a7da0..72cb0a1ba6261 100644 --- a/libs/exponential-histogram/build.gradle +++ b/libs/exponential-histogram/build.gradle @@ -14,4 +14,5 @@ dependencies { compileOnly project(':libs:core') //just for @Nullable annotations testImplementation(project(":test:framework")) + testImplementation('org.apache.commons:commons-math3:3.6.1') } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExpHistoGenerator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java similarity index 94% rename from libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExpHistoGenerator.java rename to libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java index 3e72ccbbbbdfc..e74d36af58400 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExpHistoGenerator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java @@ -17,7 +17,7 @@ /** * Class for generating a histogram from raw values. */ -public class ExpHistoGenerator { +public class ExponentialHistogramGenerator { private final double[] rawValueBuffer; int valueCount; @@ -27,7 +27,7 @@ public class ExpHistoGenerator { private boolean isFinished = false; - public ExpHistoGenerator(int numBuckets) { + public ExponentialHistogramGenerator(int numBuckets) { rawValueBuffer = new double[numBuckets]; valueCount = 0; valueBuffer = new FixedSizeExponentialHistogram(numBuckets); @@ -59,7 +59,7 @@ public static ExponentialHistogram createFor(double... values) { } public static ExponentialHistogram createFor(int bucketCount, DoubleStream values) { - ExpHistoGenerator generator = new ExpHistoGenerator(bucketCount); + ExponentialHistogramGenerator generator = new ExponentialHistogramGenerator(bucketCount); values.forEach(generator::add); return generator.get(); } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java index 5ffe1c3f1a1c4..b5cb8da1e4fa6 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java @@ -26,6 +26,13 @@ public ExponentialHistogramMerger(int resultBucketCount) { buffer = new FixedSizeExponentialHistogram(resultBucketCount); } + // Only inteded for testing, using this in production means an unnecessary reduction of precision + ExponentialHistogramMerger(int resultBucketCount, int minScale) { + this(resultBucketCount); + result.resetBuckets(minScale); + buffer.resetBuckets(minScale); + } + public void add(ExponentialHistogram toAdd) { if (isFinished) { throw new IllegalStateException("get() has already been called"); diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java index 671f241ee7f14..d1d370e19a8d1 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java @@ -16,10 +16,6 @@ public record ZeroBucket(long index, int scale, long count) { private static final ZeroBucket MINIMAL_EMPTY = new ZeroBucket(Long.MIN_VALUE, Integer.MIN_VALUE/256, 0); - public ZeroBucket(long index, int scale) { - this(index, scale, 0); - } - public ZeroBucket(double zeroThreshold, long count) { this(computeIndex(zeroThreshold, DEFAULT_BUCKET_SCALE) + 1, DEFAULT_BUCKET_SCALE, count); } @@ -91,8 +87,15 @@ public ZeroBucket collapseOverlappingBuckets(ExponentialHistogram.BucketIterator if (collapsedCount == 0) { return this; } else { + long newZeroCount = count + collapsedCount; // +1 because we need to adjust the zero threshold to the upper boundary of the collapsed bucket - return new ZeroBucket(highestCollapsedIndex + 1, buckets.scale(), count + collapsedCount); + long collapsedUpperBoundIndex = Math.addExact(highestCollapsedIndex , 1); + if (compareLowerBoundaries(index, scale, collapsedUpperBoundIndex, buckets.scale()) >= 0) { + // we still have a larger zero-threshold than the largest collapsed bucket's upper boundary + return new ZeroBucket(index, scale, newZeroCount); + } else { + return new ZeroBucket(collapsedUpperBoundIndex, buckets.scale(), newZeroCount); + } } } } diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTest.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTest.java new file mode 100644 index 0000000000000..09cfa1294171e --- /dev/null +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTest.java @@ -0,0 +1,139 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.exponentialhistogram; + +import org.elasticsearch.test.ESTestCase; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Random; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static org.elasticsearch.exponentialhistogram.FixedSizeExponentialHistogramTest.printMidpoints; +import static org.hamcrest.Matchers.equalTo; + +public class ExponentialHistogramMergerTest extends ESTestCase { + + public void testZeroThresholdCollapsesOverlappingBuckets() { + FixedSizeExponentialHistogram first = new FixedSizeExponentialHistogram(100); + first.setZeroBucket(new ZeroBucket(2.0001, 10)); + + FixedSizeExponentialHistogram second = new FixedSizeExponentialHistogram(100); + first.resetBuckets(0); //scale 0 means base 2 + first.tryAddBucket(0, 1, false); // bucket (-2, 1] + first.tryAddBucket(1, 1, false); // bucket (-4, 2] + first.tryAddBucket(2, 7, false); // bucket (-8, 4] + first.tryAddBucket(0, 1, true); // bucket (1, 2] + first.tryAddBucket(1, 1, true); // bucket (2, 4] + first.tryAddBucket(2, 42, true); // bucket (4, 8] + + ExponentialHistogram mergeResult = mergeWithMinimumScale(100, 0, first, second); + + assertThat(mergeResult.zeroBucket().zeroThreshold(), equalTo(4.0)); + assertThat(mergeResult.zeroBucket().count(), equalTo(14L)); + + // only the (4, 8] bucket should be left + assertThat(mergeResult.scale(), equalTo(0)); + + ExponentialHistogram.BucketIterator negBuckets = mergeResult.negativeBuckets(); + assertThat(negBuckets.peekIndex(), equalTo(2L)); + assertThat(negBuckets.peekCount(), equalTo(7L)); + negBuckets.advance(); + assertThat(negBuckets.hasNext(), equalTo(false)); + + ExponentialHistogram.BucketIterator posBuckets = mergeResult.positiveBuckets(); + assertThat(posBuckets.peekIndex(), equalTo(2L)); + assertThat(posBuckets.peekCount(), equalTo(42L)); + posBuckets.advance(); + assertThat(posBuckets.hasNext(), equalTo(false)); + + // ensure buckets of the accumulated histogram are collapsed too if needed + FixedSizeExponentialHistogram third = new FixedSizeExponentialHistogram(100); + third.setZeroBucket(new ZeroBucket(45.0, 1)); + + mergeResult = mergeWithMinimumScale(100, 0, mergeResult, third); + assertThat(mergeResult.zeroBucket().zeroThreshold(), equalTo(45.0)); + assertThat(mergeResult.zeroBucket().count(), equalTo(1L + 14L + 42L + 7L)); + assertThat(mergeResult.positiveBuckets().hasNext(), equalTo(false)); + assertThat(mergeResult.negativeBuckets().hasNext(), equalTo(false)); + } + + public void testEmptyZeroBucketIgnored() { + FixedSizeExponentialHistogram first = new FixedSizeExponentialHistogram(100); + first.setZeroBucket(new ZeroBucket(2.0, 10)); + first.resetBuckets(0); //scale 0 means base 2 + first.tryAddBucket(2, 42L, true); // bucket (4, 8] + + FixedSizeExponentialHistogram second = new FixedSizeExponentialHistogram(100); + second.setZeroBucket(new ZeroBucket(100.0, 0)); + + ExponentialHistogram mergeResult = mergeWithMinimumScale(100, 0, first, second); + + assertThat(mergeResult.zeroBucket().zeroThreshold(), equalTo(2.0)); + assertThat(mergeResult.zeroBucket().count(), equalTo(10L)); + + ExponentialHistogram.BucketIterator posBuckets = mergeResult.positiveBuckets(); + assertThat(posBuckets.peekIndex(), equalTo(2L)); + assertThat(posBuckets.peekCount(), equalTo(42L)); + posBuckets.advance(); + assertThat(posBuckets.hasNext(), equalTo(false)); + } + + + /** + * Verify that the resulting histogram is independent of the order of elements and therefore merges performed. + */ + public void testMergeOrderIndependence() { + Random rnd = new Random(42); + + List values = IntStream.range(0, 10_000) + .mapToDouble(i -> i<17 ? 0 : rnd.nextDouble() * Math.pow(10, rnd.nextLong()%4)) + .boxed() + .collect(Collectors.toCollection(ArrayList::new)); + + ExponentialHistogram reference = ExponentialHistogramGenerator.createFor(20, values.stream().mapToDouble(Double::doubleValue)); + + for (int i=0; i<100; i++) { + Collections.shuffle(values, rnd); + ExponentialHistogram shuffled = ExponentialHistogramGenerator.createFor(20, values.stream().mapToDouble(Double::doubleValue)); + + assertThat("Expected same scale", shuffled.scale(), equalTo(reference.scale())); + assertThat("Expected same zero-bucket", shuffled.zeroBucket(), equalTo(reference.zeroBucket())); + assertBucketsEqual(shuffled.negativeBuckets(), reference.negativeBuckets()); + assertBucketsEqual(shuffled.positiveBuckets(), reference.positiveBuckets()); + } + printMidpoints(reference); + + } + + private void assertBucketsEqual(ExponentialHistogram.BucketIterator itA, ExponentialHistogram.BucketIterator itB) { + assertThat("Expecting both set of buckets to be emptry or non-empty", itA.hasNext(), equalTo(itB.hasNext())); + while(itA.hasNext() && itB.hasNext()) { + assertThat(itA.peekIndex(), equalTo(itB.peekIndex())); + assertThat(itA.peekCount(), equalTo(itB.peekCount())); + assertThat("The number of buckets is different", itA.hasNext(), equalTo(itB.hasNext())); + itA.advance(); + itB.advance(); + } + } + + private static ExponentialHistogram mergeWithMinimumScale(int bucketCount, int scale, ExponentialHistogram... histograms) { + ExponentialHistogramMerger merger = new ExponentialHistogramMerger(bucketCount, scale); + Arrays.stream(histograms).forEach(merger::add); + return merger.get(); + } + + + + +} diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java index 4806b230876e6..2c1db6228eecb 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java @@ -16,8 +16,8 @@ public class FixedSizeExponentialHistogramTest { @Test public void testPrintBuckets() { - ExponentialHistogram first = ExpHistoGenerator.createFor(0.01234, 42, 56789); - ExponentialHistogram second = ExpHistoGenerator.createFor(38, 50, 250, 257, 10001.1234); + ExponentialHistogram first = ExponentialHistogramGenerator.createFor(0.01234, 42, 56789); + ExponentialHistogram second = ExponentialHistogramGenerator.createFor(38, 50, 250, 257, 10001.1234); ExponentialHistogram result = ExponentialHistogramMerger.merge(7, first, second); printMidpoints(result); @@ -27,7 +27,7 @@ public void testPrintBuckets() { @Test public void testPrintBucketsLinearScale() { - ExponentialHistogram result = ExpHistoGenerator.createFor( + ExponentialHistogram result = ExponentialHistogramGenerator.createFor( 1000, IntStream.range(-1_000_000, 2_000_000).mapToDouble(Double::valueOf) ); @@ -39,7 +39,7 @@ public void testPrintBucketsLinearScale() { printMidpoints(result); } - private static void printMidpoints(ExponentialHistogram histo) { + public static void printMidpoints(ExponentialHistogram histo) { StringBuilder sb = new StringBuilder("{ base : "); sb.append(ExponentialHistogramUtils.getLowerBucketBoundary(1, histo.scale())).append(", "); ExponentialHistogram.BucketIterator neg = histo.negativeBuckets(); diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/PercentileAccuracyTest.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/PercentileAccuracyTest.java new file mode 100644 index 0000000000000..d6d019d88f226 --- /dev/null +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/PercentileAccuracyTest.java @@ -0,0 +1,307 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.exponentialhistogram; + +import org.apache.commons.math3.distribution.BetaDistribution; +import org.apache.commons.math3.distribution.ExponentialDistribution; +import org.apache.commons.math3.distribution.GammaDistribution; +import org.apache.commons.math3.distribution.LogNormalDistribution; +import org.apache.commons.math3.distribution.NormalDistribution; +import org.apache.commons.math3.distribution.RealDistribution; +import org.apache.commons.math3.distribution.UniformRealDistribution; +import org.apache.commons.math3.distribution.WeibullDistribution; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.stat.descriptive.rank.Percentile; +import org.elasticsearch.test.ESTestCase; + +import java.util.Arrays; +import java.util.Random; + +import static org.hamcrest.Matchers.closeTo; +import static org.hamcrest.Matchers.lessThan; + +public class PercentileAccuracyTest extends ESTestCase { + + public static final double[] PERCENTILES_TO_TEST = { 0, 0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 1.0 }; + + /** + * Test percentile accuracy with uniform distribution + */ + public void testUniformDistribution() { + testDistributionPercentileAccuracy( + new UniformRealDistribution(new Well19937c(42), 0, 100), + 50000, + 500 + ); + } + + /** + * Test percentile accuracy with normal distribution + */ + public void testNormalDistribution() { + testDistributionPercentileAccuracy( + new NormalDistribution(new Well19937c(42), 100, 15), + 50000, + 500 + ); + } + + /** + * Test percentile accuracy with exponential distribution + */ + public void testExponentialDistribution() { + testDistributionPercentileAccuracy( + new ExponentialDistribution(new Well19937c(42), 10), + 50000, + 500 + ); + } + + /** + * Test percentile accuracy with log-normal distribution + */ + public void testLogNormalDistribution() { + testDistributionPercentileAccuracy( + new LogNormalDistribution(new Well19937c(42), 0, 1), + 50000, + 500 + ); + } + + /** + * Test percentile accuracy with gamma distribution + */ + public void testGammaDistribution() { + testDistributionPercentileAccuracy( + new GammaDistribution(new Well19937c(42), 2, 5), + 50000, + 500 + ); + } + + /** + * Test percentile accuracy with beta distribution + */ + public void testBetaDistribution() { + testDistributionPercentileAccuracy( + new BetaDistribution(new Well19937c(42), 2, 5), + 50000, + 500 + ); + } + + /** + * Test percentile accuracy with Weibull distribution + */ + public void testWeibullDistribution() { + testDistributionPercentileAccuracy( + new WeibullDistribution(new Well19937c(42), 2, 5), + 50000, + 500 + ); + } + + /** + * Test how bucket count affects percentile accuracy + */ + public void testBucketCountImpact() { + RealDistribution distribution = new LogNormalDistribution(new Well19937c(42), 0, 1); + int sampleSize = 50000; + double[] values = generateSamples(distribution, sampleSize); + + // Test with different bucket counts + int[] bucketCounts = { 10, 50, 100, 200, 500 }; + for (int bucketCount : bucketCounts) { + double maxError = testPercentileAccuracy(values, bucketCount); + logger.info("Bucket count: " + bucketCount + ", Max relative error: " + maxError); + } + + // Verify that more buckets generally means better accuracy + double errorWithFewBuckets = testPercentileAccuracy(values, 20); + double errorWithManyBuckets = testPercentileAccuracy(values, 200); + assertThat("More buckets should improve accuracy", errorWithManyBuckets, lessThan(errorWithFewBuckets)); + } + + /** + * Test percentile accuracy with mixed positive and negative values + */ + public void testMixedSignValues() { + Random random = new Random(42); + double[] values = new double[10000]; + for (int i = 0; i < values.length; i++) { + values[i] = (random.nextDouble() * 200) - 100; // Range from -100 to 100 + } + + testPercentileAccuracy(values, 100); + } + + /** + * Test percentile accuracy with skewed data + */ + public void testSkewedData() { + // Create a highly skewed dataset + Random random = new Random(42); + double[] values = new double[10000]; + for (int i = 0; i < values.length; i++) { + if (random.nextDouble() < 0.9) { + // 90% of values are small + values[i] = random.nextDouble() * 10; + } else { + // 10% are very large + values[i] = random.nextDouble() * 10000 + 100; + } + } + + testPercentileAccuracy(values, 100); + } + + /** + * Test percentile accuracy with data containing zeros + */ + public void testDataWithZeros() { + Random random = new Random(42); + double[] values = new double[10000]; + for (int i = 0; i < values.length; i++) { + if (random.nextDouble() < 0.2) { + // 20% zeros + values[i] = 0; + } else { + values[i] = random.nextDouble() * 100; + } + } + + testPercentileAccuracy(values, 100); + } + + /** + * Helper method to test percentile accuracy for a given distribution + */ + private void testDistributionPercentileAccuracy(RealDistribution distribution, int sampleSize, int bucketCount) { + double[] values = generateSamples(distribution, sampleSize); + testPercentileAccuracy(values, bucketCount); + } + + /** + * Helper method to generate samples from a distribution + */ + private double[] generateSamples(RealDistribution distribution, int sampleSize) { + double[] values = new double[sampleSize]; + for (int i = 0; i < sampleSize; i++) { + values[i] = distribution.sample(); + } + return values; + } + + /** + * Helper method to test percentile accuracy for a given dataset + */ + private double testPercentileAccuracy(double[] values, int bucketCount) { + // Create histogram + ExponentialHistogram histogram = ExponentialHistogramGenerator.createFor(bucketCount, Arrays.stream(values)); + + // Calculate exact percentiles + Percentile exactPercentile = new Percentile(); + exactPercentile.setData(values); + + double allowedError = getMaximumRelativeError(values, bucketCount); + double maxError = 0; + + // Compare histogram percentiles with exact percentiles + for (double p : PERCENTILES_TO_TEST) { + double exactValue; + if (p == 0) { + exactValue = Arrays.stream(values).min().getAsDouble(); + } else if (p == 1) { + exactValue = Arrays.stream(values).max().getAsDouble(); + } else { + exactValue = exactPercentile.evaluate(p * 100); + } + double histoValue = ExpHistoPercentiles.getPercentile(histogram, p); + + // Skip comparison if exact value is zero to avoid division by zero + if (Math.abs(exactValue) < 1e-10) { + continue; + } + + double relativeError = Math.abs(histoValue - exactValue) / Math.abs(exactValue); + maxError = Math.max(maxError, relativeError); + + logger.info( + String.format( + "Percentile %.2f: Exact=%.6f, Histogram=%.6f, Relative Error=%.8f, Allowed Relative Error=%.8f", + p, + exactValue, + histoValue, + relativeError, + allowedError + ) + ); + + assertThat( + String.format("Percentile %.2f should be accurate within %.6f%% relative error", p, allowedError * 100), + histoValue, + closeTo(exactValue, Math.abs(exactValue * allowedError)) + ); + + } + return maxError; + } + + /** + * Provides the upper bound of the relative error for any percentile estimate performed with the exponential histogram. + * The error depends on the raw values put into the histogram and the number of buckets allowed. + * This is an implementation of the error bound computation proven by Theorem 3 in the UDDSketch paper + */ + private double getMaximumRelativeError(double[] values, int bucketCount) { + double smallestAbsNegative = Double.MAX_VALUE; + double largestAbsNegative = 0; + double smallestPositive = Double.MAX_VALUE; + double largestPositive = 0; + + for (double value : values) { + if (value < 0) { + smallestAbsNegative = Math.min(-value, smallestAbsNegative); + largestAbsNegative = Math.max(-value, largestAbsNegative); + } else if (value > 0) { + smallestPositive = Math.min(value, smallestPositive); + largestPositive = Math.max(value, largestPositive); + } + } + + // Our algorithm is designed to optimally distribute the bucket budget across the positive and negative range + // therefore we simply try all variations here and assume the smallest possible error + + if (largestAbsNegative == 0) { + // only positive values + double gammaSquare = Math.pow(largestPositive / smallestPositive, 2.0 / (bucketCount)); + return (gammaSquare - 1) / (gammaSquare + 1); + } else if (largestAbsNegative == 0) { + // only negative values + double gammaSquare = Math.pow(largestAbsNegative / smallestAbsNegative, 2.0 / (bucketCount)); + return (gammaSquare - 1) / (gammaSquare + 1); + } else { + double smallestError = Double.MAX_VALUE; + for (int positiveBuckets = 1; positiveBuckets < bucketCount - 1; positiveBuckets++) { + int negativeBuckets = bucketCount - positiveBuckets; + + double gammaSquareNeg = Math.pow(largestAbsNegative / smallestAbsNegative, 2.0 / (negativeBuckets)); + double errorNeg = (gammaSquareNeg - 1) / (gammaSquareNeg + 1); + + double gammaSquarePos = Math.pow(largestAbsNegative / smallestAbsNegative, 2.0 / (positiveBuckets)); + double errorPos = (gammaSquarePos - 1) / (gammaSquarePos + 1); + + double error = Math.max(errorNeg, errorPos); + smallestError = Math.min(smallestError, error); + } + return smallestError; + } + } + +} From cef3b117e29f60139891c64ac3ea7146acd4a755 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Wed, 9 Jul 2025 13:01:28 +0200 Subject: [PATCH 05/32] Stash benchmark changes --- benchmarks/build.gradle | 1 + .../ExponentialHistogramGenerationBench.java | 101 ++++++++++++++++++ .../FixedSizeExponentialHistogram.java | 2 +- .../MergingBucketIterator.java | 2 +- .../ScaleAdjustingBucketIterator.java | 2 +- 5 files changed, 105 insertions(+), 3 deletions(-) create mode 100644 benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramGenerationBench.java diff --git a/benchmarks/build.gradle b/benchmarks/build.gradle index de5f7a7ab581f..96727eb3a2670 100644 --- a/benchmarks/build.gradle +++ b/benchmarks/build.gradle @@ -49,6 +49,7 @@ dependencies { api(project(':x-pack:plugin:esql:compute')) implementation project(path: ':libs:native') implementation project(path: ':libs:simdvec') + implementation project(path: ':libs:exponential-histogram') expression(project(path: ':modules:lang-expression', configuration: 'zip')) painless(project(path: ':modules:lang-painless', configuration: 'zip')) nativeLib(project(':libs:native')) diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramGenerationBench.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramGenerationBench.java new file mode 100644 index 0000000000000..79cc46c859941 --- /dev/null +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramGenerationBench.java @@ -0,0 +1,101 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.benchmark.exponentialhistogram; + + +import org.elasticsearch.exponentialhistogram.ExponentialHistogramGenerator; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.profile.GCProfiler; +import org.openjdk.jmh.profile.StackProfiler; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +import java.util.Random; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Warmup(iterations = 3, time = 3, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 5, time = 2, timeUnit = TimeUnit.SECONDS) +@Fork(1) +@Threads(1) +@State(Scope.Thread) +public class ExponentialHistogramGenerationBench { + + + @Param({ "100", "500" , "1000", "5000"}) + int bucketCount; + + @Param({ "NORMAL", "GAUSSIAN" }) + String distribution; + + Random random; + ExponentialHistogramGenerator histoGenerator; + + double[] data = new double[1000000]; + + @Setup + public void setUp() { + random = ThreadLocalRandom.current(); + histoGenerator = new ExponentialHistogramGenerator(bucketCount); + + Supplier nextRandom = () -> distribution.equals("GAUSSIAN") ? random.nextGaussian() : random.nextDouble(); + + //TODO: why is this here for T-DIGEST? + for (int i = 0; i < 10000; ++i) { + histoGenerator.add(nextRandom.get()); + } + + for (int i = 0; i < data.length; ++i) { + data[i] = nextRandom.get(); + } + } + + @State(Scope.Thread) + public static class ThreadState { + int index = 0; + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void add(ThreadState state) { + if (state.index >= data.length) { + state.index = 0; + } + histoGenerator.add(data[state.index++]); + } + + public static void main(String[] args) throws RunnerException { + Options opt = new OptionsBuilder().include(".*" + ExponentialHistogramGenerationBench.class.getSimpleName() + ".*") + .warmupIterations(5) + .measurementIterations(5) + .addProfiler(GCProfiler.class) + .addProfiler(StackProfiler.class) + .build(); + + new Runner(opt).run(); + } +} diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java index 13cb5aa5169a4..56c2ce4741f91 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java @@ -9,7 +9,7 @@ import java.util.Arrays; -public class FixedSizeExponentialHistogram implements ExponentialHistogramBuilder, ExponentialHistogram { +public final class FixedSizeExponentialHistogram implements ExponentialHistogramBuilder, ExponentialHistogram { // scale of 52 is the largest scale being able to represent the smallest and largest double numbers // while giving a relative error less diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java index 7c6317172b596..76d78736cb364 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java @@ -7,7 +7,7 @@ package org.elasticsearch.exponentialhistogram; -public class MergingBucketIterator implements ExponentialHistogram.BucketIterator { +public final class MergingBucketIterator implements ExponentialHistogram.BucketIterator { private final ExponentialHistogram.BucketIterator itA; private final ExponentialHistogram.BucketIterator itB; diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java index 4db8f960ad2f3..d7894466a385c 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java @@ -9,7 +9,7 @@ import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.adjustScale; -public class ScaleAdjustingBucketIterator implements ExponentialHistogram.BucketIterator { +public final class ScaleAdjustingBucketIterator implements ExponentialHistogram.BucketIterator { private final ExponentialHistogram.BucketIterator delegate; private final int scaleAdjustment; From 6a2b62fcc778b0000ce33eeddfb452f4dcb3ff2c Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Thu, 10 Jul 2025 09:12:01 +0200 Subject: [PATCH 06/32] spotless, checkstyle --- .../ExponentialHistogramGenerationBench.java | 6 ++---- .../exponentialhistogram/ExponentialHistogramMerger.java | 7 ++++++- .../FixedSizeExponentialHistogram.java | 2 -- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramGenerationBench.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramGenerationBench.java index 79cc46c859941..5aaf81f64c6f4 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramGenerationBench.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramGenerationBench.java @@ -9,7 +9,6 @@ package org.elasticsearch.benchmark.exponentialhistogram; - import org.elasticsearch.exponentialhistogram.ExponentialHistogramGenerator; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; @@ -44,8 +43,7 @@ @State(Scope.Thread) public class ExponentialHistogramGenerationBench { - - @Param({ "100", "500" , "1000", "5000"}) + @Param({ "100", "500", "1000", "5000" }) int bucketCount; @Param({ "NORMAL", "GAUSSIAN" }) @@ -63,7 +61,7 @@ public void setUp() { Supplier nextRandom = () -> distribution.equals("GAUSSIAN") ? random.nextGaussian() : random.nextDouble(); - //TODO: why is this here for T-DIGEST? + // TODO: why is this here for T-DIGEST? for (int i = 0; i < 10000; ++i) { histoGenerator.add(nextRandom.get()); } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java index b5cb8da1e4fa6..63589ca8a0755 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java @@ -121,7 +121,12 @@ private static void merge(ExponentialHistogramBuilder output, ExponentialHistogr } } - private static int putBuckets(ExponentialHistogramBuilder output, ExponentialHistogram.BucketIterator buckets, boolean isPositive, @Nullable DownscaleStats downscaleStats) { + private static int putBuckets( + ExponentialHistogramBuilder output, + ExponentialHistogram.BucketIterator buckets, + boolean isPositive, + @Nullable DownscaleStats downscaleStats + ) { boolean collectDownScaleStatsOnNext = false; long prevIndex = 0; int overflowCount = 0; diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java index 56c2ce4741f91..e3b3d8ae40916 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java @@ -7,8 +7,6 @@ package org.elasticsearch.exponentialhistogram; -import java.util.Arrays; - public final class FixedSizeExponentialHistogram implements ExponentialHistogramBuilder, ExponentialHistogram { // scale of 52 is the largest scale being able to represent the smallest and largest double numbers From eb955cd09c12e4a443bd751f2611aedd97367010 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Thu, 10 Jul 2025 09:40:01 +0200 Subject: [PATCH 07/32] more build fixes --- libs/exponential-histogram/build.gradle | 3 +-- .../ExponentialHistogramMerger.java | 4 +--- .../exponentialhistogram/DownscaleStatsTest.java | 13 +++++-------- .../ExponentialHistogramUtilsTest.java | 12 +++--------- .../FixedSizeExponentialHistogramTest.java | 6 ++---- 5 files changed, 12 insertions(+), 26 deletions(-) diff --git a/libs/exponential-histogram/build.gradle b/libs/exponential-histogram/build.gradle index 72cb0a1ba6261..87243ed2f6536 100644 --- a/libs/exponential-histogram/build.gradle +++ b/libs/exponential-histogram/build.gradle @@ -9,10 +9,9 @@ // TODO: publish this when ready? //apply plugin: 'elasticsearch.publish' +apply plugin: 'elasticsearch.build' dependencies { - compileOnly project(':libs:core') //just for @Nullable annotations - testImplementation(project(":test:framework")) testImplementation('org.apache.commons:commons-math3:3.6.1') } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java index 63589ca8a0755..eba77dd6eea68 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java @@ -7,8 +7,6 @@ package org.elasticsearch.exponentialhistogram; -import org.elasticsearch.core.Nullable; - import java.util.Arrays; import java.util.stream.Stream; @@ -125,7 +123,7 @@ private static int putBuckets( ExponentialHistogramBuilder output, ExponentialHistogram.BucketIterator buckets, boolean isPositive, - @Nullable DownscaleStats downscaleStats + DownscaleStats downscaleStats ) { boolean collectDownScaleStatsOnNext = false; long prevIndex = 0; diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTest.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTest.java index 15817272ab00f..bf465eff380d2 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTest.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTest.java @@ -7,7 +7,7 @@ package org.elasticsearch.exponentialhistogram; -import org.junit.Test; +import org.elasticsearch.test.ESTestCase; import java.util.ArrayList; import java.util.Collection; @@ -20,21 +20,18 @@ import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; -public class DownscaleStatsTest { +public class DownscaleStatsTest extends ESTestCase { - @Test - public void exponential() { + public void testExponential() { long[] values = IntStream.range(0, 100).mapToLong(i -> (long) Math.min(Integer.MAX_VALUE, Math.pow(1.1, i))).distinct().toArray(); verifyFor(values); } - @Test - public void numericalLimits() { + public void testNumericalLimits() { verifyFor(Long.MIN_VALUE, Long.MAX_VALUE); } - @Test - public void random() { + public void testRandom() { Random rnd = new Random(42); for (int i = 0; i < 100; i++) { diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTest.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTest.java index 9fee2e91b7fb6..3f0f0d0f1b028 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTest.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTest.java @@ -7,7 +7,7 @@ package org.elasticsearch.exponentialhistogram; -import org.junit.Test; +import org.elasticsearch.test.ESTestCase; import java.util.Random; @@ -21,9 +21,8 @@ import static org.hamcrest.Matchers.lessThanOrEqualTo; import static org.junit.Assert.assertThrows; -public class ExponentialHistogramUtilsTest { +public class ExponentialHistogramUtilsTest extends ESTestCase { - @Test public void testMaxValue() { assertThat(getMaximumScaleIncrease(Long.MAX_VALUE), equalTo(0)); assertThat(getMaximumScaleIncrease(Long.MAX_VALUE >> 1), equalTo(1)); @@ -34,7 +33,6 @@ public void testMaxValue() { assertThat(adjustScale(Long.MAX_VALUE >> 4, 4), equalTo((Long.MAX_VALUE & ~15) + 6)); } - @Test public void testMinValue() { assertThat(getMaximumScaleIncrease(Long.MIN_VALUE), equalTo(0)); assertThat(getMaximumScaleIncrease(Long.MIN_VALUE >> 1), equalTo(1)); @@ -45,7 +43,6 @@ public void testMinValue() { assertThat(adjustScale(Long.MIN_VALUE >> 4, 4), equalTo((Long.MIN_VALUE & ~15) + 6)); } - @Test public void testRandom() { Random rnd = new Random(42); @@ -60,8 +57,7 @@ public void testRandom() { } - @Test - public void randomComparison() { + public void testRandomComparison() { Random rnd = new Random(42); for (int i=0; i<100_000; i++) { @@ -91,7 +87,6 @@ public void randomComparison() { } - @Test public void testScalingUpToMidpoint() { long midpointIndex = adjustScale(0, 64); double lowerBoundary = getLowerBucketBoundary(midpointIndex, 64); @@ -102,7 +97,6 @@ public void testScalingUpToMidpoint() { assertThat(upperBoundary, equalTo(4.0/3.0)); } - @Test public void testSaneBucketBoundaries() { assertThat(getLowerBucketBoundary(0, 42), equalTo(1.0)); assertThat(getLowerBucketBoundary(1, 0), equalTo(2.0)); diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java index 2c1db6228eecb..d5be273f4baec 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java @@ -7,14 +7,13 @@ package org.elasticsearch.exponentialhistogram; -import org.junit.Test; +import org.elasticsearch.test.ESTestCase; import java.util.stream.IntStream; -public class FixedSizeExponentialHistogramTest { +public class FixedSizeExponentialHistogramTest extends ESTestCase { - @Test public void testPrintBuckets() { ExponentialHistogram first = ExponentialHistogramGenerator.createFor(0.01234, 42, 56789); ExponentialHistogram second = ExponentialHistogramGenerator.createFor(38, 50, 250, 257, 10001.1234); @@ -24,7 +23,6 @@ public void testPrintBuckets() { } - @Test public void testPrintBucketsLinearScale() { ExponentialHistogram result = ExponentialHistogramGenerator.createFor( From 2eb5fdd40ce923ae2edf62e9bc311a847adff7c6 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Thu, 10 Jul 2025 09:47:26 +0200 Subject: [PATCH 08/32] Fix license headers --- .../exponentialhistogram/DownscaleStats.java | 8 +++++--- .../exponentialhistogram/ExpHistoPercentiles.java | 8 +++++--- .../exponentialhistogram/ExponentialHistogram.java | 8 +++++--- .../ExponentialHistogramBuilder.java | 8 +++++--- .../ExponentialHistogramMerger.java | 8 +++++--- .../ExponentialHistogramUtils.java | 8 +++++--- .../FixedSizeExponentialHistogram.java | 8 +++++--- .../exponentialhistogram/MergingBucketIterator.java | 8 +++++--- .../ScaleAdjustingBucketIterator.java | 8 +++++--- .../elasticsearch/exponentialhistogram/ZeroBucket.java | 8 +++++--- .../exponentialhistogram/DownscaleStatsTest.java | 8 +++++--- .../ExponentialHistogramUtilsTest.java | 10 +++++----- .../FixedSizeExponentialHistogramTest.java | 8 +++++--- 13 files changed, 65 insertions(+), 41 deletions(-) diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java index bb2de85526506..0681e11535303 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java @@ -1,8 +1,10 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ package org.elasticsearch.exponentialhistogram; diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExpHistoPercentiles.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExpHistoPercentiles.java index 7a9855ebea51c..956fdaf97468c 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExpHistoPercentiles.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExpHistoPercentiles.java @@ -1,8 +1,10 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ package org.elasticsearch.exponentialhistogram; diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java index 20fffac06b173..fdb649ca32995 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java @@ -1,8 +1,10 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ package org.elasticsearch.exponentialhistogram; diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramBuilder.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramBuilder.java index 444ee984decad..17c49486cb59e 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramBuilder.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramBuilder.java @@ -1,8 +1,10 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ package org.elasticsearch.exponentialhistogram; diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java index eba77dd6eea68..33ac538aadcbd 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java @@ -1,8 +1,10 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ package org.elasticsearch.exponentialhistogram; diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java index 3477ae18877a4..01a18f90d1e24 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java @@ -1,8 +1,10 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ package org.elasticsearch.exponentialhistogram; diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java index e3b3d8ae40916..2b36b6d2d1412 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java @@ -1,8 +1,10 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ package org.elasticsearch.exponentialhistogram; diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java index 76d78736cb364..37ff9201e556a 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java @@ -1,8 +1,10 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ package org.elasticsearch.exponentialhistogram; diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java index d7894466a385c..90f7187d68402 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java @@ -1,8 +1,10 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ package org.elasticsearch.exponentialhistogram; diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java index d1d370e19a8d1..ca9e514549cd8 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java @@ -1,8 +1,10 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ package org.elasticsearch.exponentialhistogram; diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTest.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTest.java index bf465eff380d2..82446ccdf2b09 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTest.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTest.java @@ -1,8 +1,10 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ package org.elasticsearch.exponentialhistogram; diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTest.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTest.java index 3f0f0d0f1b028..394a4aee10d06 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTest.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTest.java @@ -1,8 +1,10 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ package org.elasticsearch.exponentialhistogram; @@ -16,10 +18,8 @@ import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.getLowerBucketBoundary; import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.getMaximumScaleIncrease; import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.getUpperBucketBoundary; -import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.lessThanOrEqualTo; -import static org.junit.Assert.assertThrows; public class ExponentialHistogramUtilsTest extends ESTestCase { diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java index d5be273f4baec..81e9f75b35643 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java @@ -1,8 +1,10 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ package org.elasticsearch.exponentialhistogram; From fd7064e040d30c7284ca153394f3e93e700f97c3 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Thu, 10 Jul 2025 09:56:14 +0200 Subject: [PATCH 09/32] spotless round 2 --- .../exponentialhistogram/DownscaleStats.java | 5 +-- .../ExpHistoPercentiles.java | 3 +- .../ExponentialHistogram.java | 6 +++ .../ExponentialHistogramBuilder.java | 2 + .../ExponentialHistogramGenerator.java | 9 ++-- .../ExponentialHistogramMerger.java | 3 +- .../ExponentialHistogramUtils.java | 45 +++++++++---------- .../MergingBucketIterator.java | 3 +- .../exponentialhistogram/ZeroBucket.java | 8 ++-- .../ExponentialHistogramMergerTest.java | 16 +++---- .../ExponentialHistogramUtilsTest.java | 17 +++---- .../FixedSizeExponentialHistogramTest.java | 2 - .../PercentileAccuracyTest.java | 42 +++-------------- 13 files changed, 63 insertions(+), 98 deletions(-) diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java index 0681e11535303..d2748d85fb7f4 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java @@ -15,7 +15,6 @@ public class DownscaleStats { // collapsed buckets when increasing the scale by (i+1) instead of (i) int[] collapsedCount = new int[63]; - void add(long previousBucketIndex, long currentBucketIndex) { if (currentBucketIndex <= previousBucketIndex) { throw new IllegalArgumentException("currentBucketIndex must be bigger than previousBucketIndex"); @@ -36,7 +35,7 @@ void add(long previousBucketIndex, long currentBucketIndex) { return; } int requiredScaleChange = 64 - numEqualLeadingBits; - collapsedCount[requiredScaleChange-1]++; + collapsedCount[requiredScaleChange - 1]++; } int getCollapsedBucketCountAfterScaleReduction(int reduction) { @@ -55,7 +54,7 @@ public int getRequiredScaleReductionToReduceBucketCountBy(int desiredReduction) for (int i = 0; i < collapsedCount.length; i++) { totalCollapsed += collapsedCount[i]; if (totalCollapsed >= desiredReduction) { - return i+1; + return i + 1; } } throw new IllegalArgumentException("it is not possible to reduce the bucket count by " + desiredReduction); diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExpHistoPercentiles.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExpHistoPercentiles.java index 956fdaf97468c..24e7fc79db00c 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExpHistoPercentiles.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExpHistoPercentiles.java @@ -11,7 +11,6 @@ public class ExpHistoPercentiles { - public static double getPercentile(ExponentialHistogram histo, double percentile) { if (percentile < 0 || percentile > 1) { throw new IllegalArgumentException("percentile must be in range [0, 1]"); @@ -40,7 +39,7 @@ public static double getPercentile(ExponentialHistogram histo, double percentile private static double getBucketMidpointForRank(ExponentialHistogram.BucketIterator buckets, long rank) { long seenCount = 0; while (buckets.hasNext()) { - seenCount+= buckets.peekCount(); + seenCount += buckets.peekCount(); if (rank < seenCount) { return ExponentialHistogramUtils.getPointOfLeastRelativeError(buckets.peekIndex(), buckets.scale()); } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java index fdb649ca32995..49ee0af339399 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java @@ -16,6 +16,7 @@ public interface ExponentialHistogram { ZeroBucket zeroBucket(); BucketIterator positiveBuckets(); + BucketIterator negativeBuckets(); /** @@ -29,10 +30,15 @@ public interface ExponentialHistogram { */ interface BucketIterator { boolean hasNext(); + long peekCount(); + long peekIndex(); + void advance(); + int scale(); + BucketIterator copy(); } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramBuilder.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramBuilder.java index 17c49486cb59e..af2dbcc86bfdc 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramBuilder.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramBuilder.java @@ -12,6 +12,8 @@ public interface ExponentialHistogramBuilder { void setZeroBucket(ZeroBucket zeroBucket); + boolean tryAddBucket(long index, long count, boolean isPositive); + void resetBuckets(int newScale); } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java index e74d36af58400..7699ea0cfb939 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java @@ -80,7 +80,7 @@ private void mergeValuesToHistogram() { for (int i = negativeValuesCount - 1; i >= 0; i--) { long count = 1; long index = computeIndex(rawValueBuffer[i], scale); - while ((i-1) >= 0 && computeIndex(rawValueBuffer[i-1] , scale) == index) { + while ((i - 1) >= 0 && computeIndex(rawValueBuffer[i - 1], scale) == index) { i--; count++; } @@ -88,14 +88,14 @@ private void mergeValuesToHistogram() { } int zeroCount = 0; - while((negativeValuesCount + zeroCount) < valueCount && rawValueBuffer[negativeValuesCount+zeroCount] == 0) { + while ((negativeValuesCount + zeroCount) < valueCount && rawValueBuffer[negativeValuesCount + zeroCount] == 0) { zeroCount++; } valueBuffer.setZeroBucket(ZeroBucket.minimalWithCount(zeroCount)); - for (int i= negativeValuesCount + zeroCount; i < valueCount; i++) { + for (int i = negativeValuesCount + zeroCount; i < valueCount; i++) { long count = 1; long index = computeIndex(rawValueBuffer[i], scale); - while ((i+1) < valueCount && computeIndex(rawValueBuffer[i+1] , scale) == index) { + while ((i + 1) < valueCount && computeIndex(rawValueBuffer[i + 1], scale) == index) { i++; count++; } @@ -106,5 +106,4 @@ private void mergeValuesToHistogram() { valueCount = 0; } - } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java index 33ac538aadcbd..0bbd3c3887772 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java @@ -61,7 +61,6 @@ public static ExponentialHistogram merge(int bucketCount, Stream> (64-scaleAdjustment) // = index << scaleAdjustment + SCALE_UP_64_OFFSET >> (64-scaleAdjustment) - return (index << scaleAdjustment) + (SCALE_UP_64_OFFSET >> (64-scaleAdjustment)); + return (index << scaleAdjustment) + (SCALE_UP_64_OFFSET >> (64 - scaleAdjustment)); } } @@ -49,26 +48,26 @@ static long adjustScale(long index, int scaleAdjustment) { * Equivalent to mathematically correct comparison of the lower bucket boundaries of the given buckets */ public static int compareLowerBoundaries(long idxA, int scaleA, long idxB, int scaleB) { - if (scaleA > scaleB) { - return -compareLowerBoundaries(idxB, scaleB, idxA, scaleA); - } - // scaleA <= scaleB - int shifts = scaleB - scaleA; - int maxScaleAdjustment = getMaximumScaleIncrease(idxA); - if (maxScaleAdjustment < shifts) { - // we would overflow if we adjust A to the scale of B - // so if A is negative, scaling would produce a number less than Long.MIN_VALUE, therefore it is definitely smaller than B - // if A is positive, scaling would produce a number bigger than Long.MAX_VALUE, therefore it is definitely bigger than B - // if A is zero => shifting and therefore scale adjustment would not have any effect - if (idxA == 0) { - return Long.compare(0, idxB); - } else { - return idxA < 0 ? -1 : +1; - } - } else { - long adjustedIdxA = idxA << shifts; - return Long.compare(adjustedIdxA, idxB); - } + if (scaleA > scaleB) { + return -compareLowerBoundaries(idxB, scaleB, idxA, scaleA); + } + // scaleA <= scaleB + int shifts = scaleB - scaleA; + int maxScaleAdjustment = getMaximumScaleIncrease(idxA); + if (maxScaleAdjustment < shifts) { + // we would overflow if we adjust A to the scale of B + // so if A is negative, scaling would produce a number less than Long.MIN_VALUE, therefore it is definitely smaller than B + // if A is positive, scaling would produce a number bigger than Long.MAX_VALUE, therefore it is definitely bigger than B + // if A is zero => shifting and therefore scale adjustment would not have any effect + if (idxA == 0) { + return Long.compare(0, idxB); + } else { + return idxA < 0 ? -1 : +1; + } + } else { + long adjustedIdxA = idxA << shifts; + return Long.compare(adjustedIdxA, idxB); + } } /** @@ -99,7 +98,7 @@ public static double getLowerBucketBoundary(long index, int scale) { public static double getPointOfLeastRelativeError(long bucketIndex, int scale) { // TODO: handle numeric limits, implement exact algorithms with 128 bit precision double inverseFactor = Math.pow(2, -scale); - return Math.pow(2, inverseFactor * (bucketIndex + 1/3.0)); + return Math.pow(2, inverseFactor * (bucketIndex + 1 / 3.0)); } /** diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java index 37ff9201e556a..264a38e36883e 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java @@ -27,7 +27,7 @@ public MergingBucketIterator(ExponentialHistogram.BucketIterator itA, Exponentia @Override public void advance() { - boolean hasNextA = itA.hasNext() ; + boolean hasNextA = itA.hasNext(); boolean hasNextB = itB.hasNext(); endReached = hasNextA == false && hasNextB == false; if (endReached) { @@ -68,7 +68,6 @@ public long peekCount() { return currentCount; } - @Override public long peekIndex() { assertEndNotReached(); diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java index ca9e514549cd8..952c295202849 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java @@ -10,13 +10,13 @@ package org.elasticsearch.exponentialhistogram; import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.compareLowerBoundaries; -import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.getLowerBucketBoundary; import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.computeIndex; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.getLowerBucketBoundary; import static org.elasticsearch.exponentialhistogram.FixedSizeExponentialHistogram.DEFAULT_BUCKET_SCALE; public record ZeroBucket(long index, int scale, long count) { - private static final ZeroBucket MINIMAL_EMPTY = new ZeroBucket(Long.MIN_VALUE, Integer.MIN_VALUE/256, 0); + private static final ZeroBucket MINIMAL_EMPTY = new ZeroBucket(Long.MIN_VALUE, Integer.MIN_VALUE / 256, 0); public ZeroBucket(double zeroThreshold, long count) { this(computeIndex(zeroThreshold, DEFAULT_BUCKET_SCALE) + 1, DEFAULT_BUCKET_SCALE, count); @@ -58,7 +58,7 @@ public ZeroBucket collapseOverlappingBuckets(ExponentialHistogram.BucketIterator ZeroBucket previous; do { previous = current; - for (ExponentialHistogram.BucketIterator buckets: bucketIterators) { + for (ExponentialHistogram.BucketIterator buckets : bucketIterators) { current = current.collapseOverlappingBuckets(buckets); } } while (previous.compareZeroThreshold(current) != 0); @@ -91,7 +91,7 @@ public ZeroBucket collapseOverlappingBuckets(ExponentialHistogram.BucketIterator } else { long newZeroCount = count + collapsedCount; // +1 because we need to adjust the zero threshold to the upper boundary of the collapsed bucket - long collapsedUpperBoundIndex = Math.addExact(highestCollapsedIndex , 1); + long collapsedUpperBoundIndex = Math.addExact(highestCollapsedIndex, 1); if (compareLowerBoundaries(index, scale, collapsedUpperBoundIndex, buckets.scale()) >= 0) { // we still have a larger zero-threshold than the largest collapsed bucket's upper boundary return new ZeroBucket(index, scale, newZeroCount); diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTest.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTest.java index 09cfa1294171e..77e1346d03d77 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTest.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTest.java @@ -29,7 +29,7 @@ public void testZeroThresholdCollapsesOverlappingBuckets() { first.setZeroBucket(new ZeroBucket(2.0001, 10)); FixedSizeExponentialHistogram second = new FixedSizeExponentialHistogram(100); - first.resetBuckets(0); //scale 0 means base 2 + first.resetBuckets(0); // scale 0 means base 2 first.tryAddBucket(0, 1, false); // bucket (-2, 1] first.tryAddBucket(1, 1, false); // bucket (-4, 2] first.tryAddBucket(2, 7, false); // bucket (-8, 4] @@ -71,7 +71,7 @@ public void testZeroThresholdCollapsesOverlappingBuckets() { public void testEmptyZeroBucketIgnored() { FixedSizeExponentialHistogram first = new FixedSizeExponentialHistogram(100); first.setZeroBucket(new ZeroBucket(2.0, 10)); - first.resetBuckets(0); //scale 0 means base 2 + first.resetBuckets(0); // scale 0 means base 2 first.tryAddBucket(2, 42L, true); // bucket (4, 8] FixedSizeExponentialHistogram second = new FixedSizeExponentialHistogram(100); @@ -89,7 +89,6 @@ public void testEmptyZeroBucketIgnored() { assertThat(posBuckets.hasNext(), equalTo(false)); } - /** * Verify that the resulting histogram is independent of the order of elements and therefore merges performed. */ @@ -97,15 +96,15 @@ public void testMergeOrderIndependence() { Random rnd = new Random(42); List values = IntStream.range(0, 10_000) - .mapToDouble(i -> i<17 ? 0 : rnd.nextDouble() * Math.pow(10, rnd.nextLong()%4)) + .mapToDouble(i -> i < 17 ? 0 : rnd.nextDouble() * Math.pow(10, rnd.nextLong() % 4)) .boxed() .collect(Collectors.toCollection(ArrayList::new)); ExponentialHistogram reference = ExponentialHistogramGenerator.createFor(20, values.stream().mapToDouble(Double::doubleValue)); - for (int i=0; i<100; i++) { + for (int i = 0; i < 100; i++) { Collections.shuffle(values, rnd); - ExponentialHistogram shuffled = ExponentialHistogramGenerator.createFor(20, values.stream().mapToDouble(Double::doubleValue)); + ExponentialHistogram shuffled = ExponentialHistogramGenerator.createFor(20, values.stream().mapToDouble(Double::doubleValue)); assertThat("Expected same scale", shuffled.scale(), equalTo(reference.scale())); assertThat("Expected same zero-bucket", shuffled.zeroBucket(), equalTo(reference.zeroBucket())); @@ -118,7 +117,7 @@ public void testMergeOrderIndependence() { private void assertBucketsEqual(ExponentialHistogram.BucketIterator itA, ExponentialHistogram.BucketIterator itB) { assertThat("Expecting both set of buckets to be emptry or non-empty", itA.hasNext(), equalTo(itB.hasNext())); - while(itA.hasNext() && itB.hasNext()) { + while (itA.hasNext() && itB.hasNext()) { assertThat(itA.peekIndex(), equalTo(itB.peekIndex())); assertThat(itA.peekCount(), equalTo(itB.peekCount())); assertThat("The number of buckets is different", itA.hasNext(), equalTo(itB.hasNext())); @@ -133,7 +132,4 @@ private static ExponentialHistogram mergeWithMinimumScale(int bucketCount, int s return merger.get(); } - - - } diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTest.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTest.java index 394a4aee10d06..13a7a61faf368 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTest.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTest.java @@ -46,7 +46,7 @@ public void testMinValue() { public void testRandom() { Random rnd = new Random(42); - for (int i=0; i<100_000; i++) { + for (int i = 0; i < 100_000; i++) { long index = rnd.nextLong(); int maxScale = getMaximumScaleIncrease(index); @@ -56,17 +56,15 @@ public void testRandom() { } - public void testRandomComparison() { Random rnd = new Random(42); - for (int i=0; i<100_000; i++) { + for (int i = 0; i < 100_000; i++) { long indexA = rnd.nextLong(); long indexB = rnd.nextLong(); int scaleA = rnd.nextInt() % 40; int scaleB = rnd.nextInt() % 40; - double lowerBoundA = getLowerBucketBoundary(indexA, scaleA); while (Double.isInfinite(lowerBoundA)) { indexA = indexA >> 1; @@ -79,9 +77,8 @@ public void testRandomComparison() { } if (lowerBoundA != lowerBoundB) { - System.out.println("Comparing "+lowerBoundA+" to "+lowerBoundB); - assertThat( - Double.compare(lowerBoundA, lowerBoundB), equalTo(compareLowerBoundaries(indexA, scaleA, indexB, scaleB))); + System.out.println("Comparing " + lowerBoundA + " to " + lowerBoundB); + assertThat(Double.compare(lowerBoundA, lowerBoundB), equalTo(compareLowerBoundaries(indexA, scaleA, indexB, scaleB))); } } @@ -93,8 +90,8 @@ public void testScalingUpToMidpoint() { double upperBoundary = getUpperBucketBoundary(midpointIndex, 64); // due to limited double-float precision the results are actually exact - assertThat(lowerBoundary, equalTo(4.0/3.0)); - assertThat(upperBoundary, equalTo(4.0/3.0)); + assertThat(lowerBoundary, equalTo(4.0 / 3.0)); + assertThat(upperBoundary, equalTo(4.0 / 3.0)); } public void testSaneBucketBoundaries() { @@ -103,7 +100,7 @@ public void testSaneBucketBoundaries() { assertThat(getLowerBucketBoundary(1, -1), equalTo(4.0)); assertThat(getLowerBucketBoundary(1, -2), equalTo(16.0)); - double limit1 = getLowerBucketBoundary(Long.MAX_VALUE-1, 56); + double limit1 = getLowerBucketBoundary(Long.MAX_VALUE - 1, 56); double limit2 = getLowerBucketBoundary(Long.MAX_VALUE, 56); assertThat(limit1, lessThanOrEqualTo(limit2)); } diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java index 81e9f75b35643..e79265613a8dd 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java @@ -15,7 +15,6 @@ public class FixedSizeExponentialHistogramTest extends ESTestCase { - public void testPrintBuckets() { ExponentialHistogram first = ExponentialHistogramGenerator.createFor(0.01234, 42, 56789); ExponentialHistogram second = ExponentialHistogramGenerator.createFor(38, 50, 250, 257, 10001.1234); @@ -24,7 +23,6 @@ public void testPrintBuckets() { printMidpoints(result); } - public void testPrintBucketsLinearScale() { ExponentialHistogram result = ExponentialHistogramGenerator.createFor( diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/PercentileAccuracyTest.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/PercentileAccuracyTest.java index d6d019d88f226..39923a38c8841 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/PercentileAccuracyTest.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/PercentileAccuracyTest.java @@ -35,77 +35,49 @@ public class PercentileAccuracyTest extends ESTestCase { * Test percentile accuracy with uniform distribution */ public void testUniformDistribution() { - testDistributionPercentileAccuracy( - new UniformRealDistribution(new Well19937c(42), 0, 100), - 50000, - 500 - ); + testDistributionPercentileAccuracy(new UniformRealDistribution(new Well19937c(42), 0, 100), 50000, 500); } /** * Test percentile accuracy with normal distribution */ public void testNormalDistribution() { - testDistributionPercentileAccuracy( - new NormalDistribution(new Well19937c(42), 100, 15), - 50000, - 500 - ); + testDistributionPercentileAccuracy(new NormalDistribution(new Well19937c(42), 100, 15), 50000, 500); } /** * Test percentile accuracy with exponential distribution */ public void testExponentialDistribution() { - testDistributionPercentileAccuracy( - new ExponentialDistribution(new Well19937c(42), 10), - 50000, - 500 - ); + testDistributionPercentileAccuracy(new ExponentialDistribution(new Well19937c(42), 10), 50000, 500); } /** * Test percentile accuracy with log-normal distribution */ public void testLogNormalDistribution() { - testDistributionPercentileAccuracy( - new LogNormalDistribution(new Well19937c(42), 0, 1), - 50000, - 500 - ); + testDistributionPercentileAccuracy(new LogNormalDistribution(new Well19937c(42), 0, 1), 50000, 500); } /** * Test percentile accuracy with gamma distribution */ public void testGammaDistribution() { - testDistributionPercentileAccuracy( - new GammaDistribution(new Well19937c(42), 2, 5), - 50000, - 500 - ); + testDistributionPercentileAccuracy(new GammaDistribution(new Well19937c(42), 2, 5), 50000, 500); } /** * Test percentile accuracy with beta distribution */ public void testBetaDistribution() { - testDistributionPercentileAccuracy( - new BetaDistribution(new Well19937c(42), 2, 5), - 50000, - 500 - ); + testDistributionPercentileAccuracy(new BetaDistribution(new Well19937c(42), 2, 5), 50000, 500); } /** * Test percentile accuracy with Weibull distribution */ public void testWeibullDistribution() { - testDistributionPercentileAccuracy( - new WeibullDistribution(new Well19937c(42), 2, 5), - 50000, - 500 - ); + testDistributionPercentileAccuracy(new WeibullDistribution(new Well19937c(42), 2, 5), 50000, 500); } /** From 66b5e2cbcc13c162ae65602116617a1b71acf117 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Thu, 10 Jul 2025 11:28:18 +0200 Subject: [PATCH 10/32] Fix tests, implement benchmarks --- .../ExponentialHistogramGenerationBench.java | 2 +- .../ExponentialHistogramMergeBench.java | 112 ++++++++++++++++++ ...tatsTest.java => DownscaleStatsTests.java} | 2 +- ...a => ExponentialHistogramMergerTests.java} | 4 +- ...va => ExponentialHistogramUtilsTests.java} | 2 +- ...> FixedSizeExponentialHistogramTests.java} | 2 +- ...Test.java => PercentileAccuracyTests.java} | 4 +- 7 files changed, 120 insertions(+), 8 deletions(-) create mode 100644 benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramMergeBench.java rename libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/{DownscaleStatsTest.java => DownscaleStatsTests.java} (97%) rename libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/{ExponentialHistogramMergerTest.java => ExponentialHistogramMergerTests.java} (98%) rename libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/{ExponentialHistogramUtilsTest.java => ExponentialHistogramUtilsTests.java} (98%) rename libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/{FixedSizeExponentialHistogramTest.java => FixedSizeExponentialHistogramTests.java} (97%) rename libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/{PercentileAccuracyTest.java => PercentileAccuracyTests.java} (99%) diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramGenerationBench.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramGenerationBench.java index 5aaf81f64c6f4..4fe509e200aae 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramGenerationBench.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramGenerationBench.java @@ -43,7 +43,7 @@ @State(Scope.Thread) public class ExponentialHistogramGenerationBench { - @Param({ "100", "500", "1000", "5000" }) + @Param({ "100", "500", "1000", "5000" , "10000", "20000"}) int bucketCount; @Param({ "NORMAL", "GAUSSIAN" }) diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramMergeBench.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramMergeBench.java new file mode 100644 index 0000000000000..b14501855a303 --- /dev/null +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramMergeBench.java @@ -0,0 +1,112 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.benchmark.exponentialhistogram; + +import org.elasticsearch.exponentialhistogram.ExponentialHistogram; +import org.elasticsearch.exponentialhistogram.ExponentialHistogramGenerator; +import org.elasticsearch.exponentialhistogram.ExponentialHistogramMerger; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; + +import java.util.List; +import java.util.Random; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Warmup(iterations = 3, time = 3, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 5, time = 2, timeUnit = TimeUnit.SECONDS) +@Fork(1) +@Threads(1) +@State(Scope.Thread) +public class ExponentialHistogramMergeBench { + + @Param({ "1000", "5000" }) + int bucketCount; + + @Param({ "0.01", "0.1", "0.25", "0.5", "1.0", "2.0" }) + double mergedHistoSizeFactor; + + Random random; + ExponentialHistogramMerger histoMerger; + + ExponentialHistogram[] toMerge = new ExponentialHistogram[10_000]; + + @Setup + public void setUp() { + random = ThreadLocalRandom.current(); + histoMerger = new ExponentialHistogramMerger(bucketCount); + + ExponentialHistogramGenerator initial = new ExponentialHistogramGenerator(bucketCount); + for (int j = 0; j < bucketCount; j++) { + initial.add(Math.pow(1.001, j)); + } + ExponentialHistogram initialHisto = initial.get(); + int cnt = getBucketCount(initialHisto); + if (cnt < bucketCount) { + throw new IllegalArgumentException("Expected bucket count to be " + bucketCount + ", but was " + cnt); + } + histoMerger.add(initialHisto); + + int dataPointSize = (int) Math.round(bucketCount * mergedHistoSizeFactor); + + for (int i = 0; i < toMerge.length; i++) { + ExponentialHistogramGenerator generator = new ExponentialHistogramGenerator(dataPointSize); + + int bucketIndex = 0; + for (int j = 0; j < dataPointSize; j++) { + bucketIndex += 1 + random.nextInt(bucketCount) % (Math.max(1, bucketCount / dataPointSize)); + generator.add(Math.pow(1.001, bucketIndex)); + } + toMerge[i] = generator.get(); + cnt = getBucketCount(toMerge[i]); + if (cnt < dataPointSize) { + throw new IllegalArgumentException("Expected bucket count to be " + dataPointSize + ", but was " + cnt); + } + } + } + + private static int getBucketCount(ExponentialHistogram histo) { + int cnt = 0; + for (ExponentialHistogram.BucketIterator it : List.of(histo.negativeBuckets(), histo.positiveBuckets())) { + while (it.hasNext()) { + cnt++; + it.advance(); + } + } + return cnt; + } + + @State(Scope.Thread) + public static class ThreadState { + int index = 0; + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void add(ThreadState state) { + if (state.index >= toMerge.length) { + state.index = 0; + } + histoMerger.add(toMerge[state.index++]); + } +} diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTest.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTests.java similarity index 97% rename from libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTest.java rename to libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTests.java index 82446ccdf2b09..043afc26aa1fb 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTest.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTests.java @@ -22,7 +22,7 @@ import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; -public class DownscaleStatsTest extends ESTestCase { +public class DownscaleStatsTests extends ESTestCase { public void testExponential() { long[] values = IntStream.range(0, 100).mapToLong(i -> (long) Math.min(Integer.MAX_VALUE, Math.pow(1.1, i))).distinct().toArray(); diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTest.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java similarity index 98% rename from libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTest.java rename to libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java index 77e1346d03d77..e28fc650d95f1 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTest.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java @@ -19,10 +19,10 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; -import static org.elasticsearch.exponentialhistogram.FixedSizeExponentialHistogramTest.printMidpoints; +import static org.elasticsearch.exponentialhistogram.FixedSizeExponentialHistogramTests.printMidpoints; import static org.hamcrest.Matchers.equalTo; -public class ExponentialHistogramMergerTest extends ESTestCase { +public class ExponentialHistogramMergerTests extends ESTestCase { public void testZeroThresholdCollapsesOverlappingBuckets() { FixedSizeExponentialHistogram first = new FixedSizeExponentialHistogram(100); diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTest.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTests.java similarity index 98% rename from libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTest.java rename to libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTests.java index 13a7a61faf368..a9936d56ff02e 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTest.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTests.java @@ -21,7 +21,7 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.lessThanOrEqualTo; -public class ExponentialHistogramUtilsTest extends ESTestCase { +public class ExponentialHistogramUtilsTests extends ESTestCase { public void testMaxValue() { assertThat(getMaximumScaleIncrease(Long.MAX_VALUE), equalTo(0)); diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTests.java similarity index 97% rename from libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java rename to libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTests.java index e79265613a8dd..72e2fe6702731 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTest.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTests.java @@ -13,7 +13,7 @@ import java.util.stream.IntStream; -public class FixedSizeExponentialHistogramTest extends ESTestCase { +public class FixedSizeExponentialHistogramTests extends ESTestCase { public void testPrintBuckets() { ExponentialHistogram first = ExponentialHistogramGenerator.createFor(0.01234, 42, 56789); diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/PercentileAccuracyTest.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/PercentileAccuracyTests.java similarity index 99% rename from libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/PercentileAccuracyTest.java rename to libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/PercentileAccuracyTests.java index 39923a38c8841..cf6eba4976ee3 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/PercentileAccuracyTest.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/PercentileAccuracyTests.java @@ -27,7 +27,7 @@ import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.lessThan; -public class PercentileAccuracyTest extends ESTestCase { +public class PercentileAccuracyTests extends ESTestCase { public static final double[] PERCENTILES_TO_TEST = { 0, 0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 1.0 }; @@ -254,7 +254,7 @@ private double getMaximumRelativeError(double[] values, int bucketCount) { // only positive values double gammaSquare = Math.pow(largestPositive / smallestPositive, 2.0 / (bucketCount)); return (gammaSquare - 1) / (gammaSquare + 1); - } else if (largestAbsNegative == 0) { + } else if (smallestAbsNegative == 0) { // only negative values double gammaSquare = Math.pow(largestAbsNegative / smallestAbsNegative, 2.0 / (bucketCount)); return (gammaSquare - 1) / (gammaSquare + 1); From 2f293d01a4907e917c387d26e47861fc45a8d63a Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Thu, 10 Jul 2025 15:06:19 +0200 Subject: [PATCH 11/32] Reduce max scale to preserve numeric accuracy --- .../ExponentialHistogramUtils.java | 35 ++++++------------- .../FixedSizeExponentialHistogram.java | 6 ++-- .../ExponentialHistogramGeneratorTests.java | 33 +++++++++++++++++ 3 files changed, 46 insertions(+), 28 deletions(-) create mode 100644 libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGeneratorTests.java diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java index f2036c2d055dc..cb9c2ed75fa25 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java @@ -29,7 +29,8 @@ public class ExponentialHistogramUtils { /** The number of bits used to represent the exponent of IEEE 754 double precision number. */ private static final int EXPONENT_WIDTH = 11; - private static final double LOG_BASE2_E = 1D / Math.log(2); + private static final double LN_2 = Math.log(2); + private static final double LOG_BASE2_E = 1D / LN_2; // Magic number, computed via log(4/3)/log(2^(2^-64)), but exact private static final long SCALE_UP_64_OFFSET = 7656090530189244512L; @@ -82,35 +83,21 @@ public static int getMaximumScaleIncrease(long index) { } public static double getUpperBucketBoundary(long index, int scale) { - long nextIndex = index; - if (index < Long.MAX_VALUE) { - nextIndex++; - } - return getLowerBucketBoundary(nextIndex, scale); + return getLowerBucketBoundary(index + 1, scale); } public static double getLowerBucketBoundary(long index, int scale) { - // TODO: handle numeric limits, implement exact algorithms with 128 bit precision - double inverseFactor = Math.pow(2, -scale); - return Math.pow(2, inverseFactor * index); + // TODO: handle numeric limits, implement by splitting the index into two 32 bit integers + double inverseFactor = Math.scalb(LN_2, -scale); + return Math.exp(inverseFactor * index); } public static double getPointOfLeastRelativeError(long bucketIndex, int scale) { // TODO: handle numeric limits, implement exact algorithms with 128 bit precision - double inverseFactor = Math.pow(2, -scale); - return Math.pow(2, inverseFactor * (bucketIndex + 1 / 3.0)); + double inverseFactor = Math.scalb(LN_2, -scale); + return Math.exp(inverseFactor * (bucketIndex + 1/3.0)); } - /** - * Compute the index for the given value. - * - *

The algorithm to retrieve the index is specified in the OpenTelemetry - * specification. - * - * @param value Measured value (must be non-zero). - * @return the index of the bucket which the value maps to. - */ static long computeIndex(double value, int scale) { double absValue = Math.abs(value); // For positive scales, compute the index by logarithm, which is simpler but may be @@ -135,7 +122,8 @@ static long computeIndex(double value, int scale) { * Scales: Use the Logarithm Function */ private static long getIndexByLogarithm(double value, int scale) { - return (long) Math.ceil(Math.log(value) * computeScaleFactor(scale)) - 1; + double scaleFactor = Math.scalb(LOG_BASE2_E, scale); + return (long) Math.ceil(Math.scalb(Math.log(value) * LOG_BASE2_E, scale)) - 1; } /** @@ -159,7 +147,4 @@ private static long mapToIndexScaleZero(double value) { return ieeeExponent; } - private static double computeScaleFactor(int scale) { - return Math.scalb(LOG_BASE2_E, scale); - } } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java index 2b36b6d2d1412..a6e78b0b4a4cd 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java @@ -11,9 +11,9 @@ public final class FixedSizeExponentialHistogram implements ExponentialHistogramBuilder, ExponentialHistogram { - // scale of 52 is the largest scale being able to represent the smallest and largest double numbers - // while giving a relative error less - public static final int DEFAULT_BUCKET_SCALE = 52; + // scale of 38 is the largest scale where the index computation doesn't suffer much rounding + // if we want to use something larger, we'll have to rework the math + public static final int DEFAULT_BUCKET_SCALE = 38; private final long[] bucketIndices; private final long[] bucketCounts; diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGeneratorTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGeneratorTests.java new file mode 100644 index 0000000000000..52f17a9f42645 --- /dev/null +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGeneratorTests.java @@ -0,0 +1,33 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.exponentialhistogram; + +import org.elasticsearch.test.ESTestCase; + +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.lessThanOrEqualTo; + +public class ExponentialHistogramGeneratorTests extends ESTestCase { + + public void testVeryLargeValue() { + double value = Double.MAX_VALUE/10; + ExponentialHistogram histo = ExponentialHistogramGenerator.createFor(value); + + long index = histo.positiveBuckets().peekIndex(); + int scale = histo.scale(); + + double lowerBound = ExponentialHistogramUtils.getLowerBucketBoundary(index, scale); + double upperBound = ExponentialHistogramUtils.getUpperBucketBoundary(index, scale); + + assertThat("Lower bucket boundary should be smaller than value", lowerBound, lessThanOrEqualTo(value)); + assertThat("Upper bucket boundary should be greater than value", upperBound, greaterThanOrEqualTo(value)); + } + +} From 91193bc5f73ed3ce002956aea44469550eabed0a Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Thu, 10 Jul 2025 15:41:38 +0200 Subject: [PATCH 12/32] Check for sane scale and indices --- .../ExponentialHistogram.java | 12 +++++ .../ExponentialHistogramUtils.java | 10 +++- .../FixedSizeExponentialHistogram.java | 12 +++-- .../exponentialhistogram/ZeroBucket.java | 7 +-- .../ExponentialHistogramMergerTests.java | 3 +- .../ExponentialHistogramUtilsTests.java | 50 +++++++++---------- .../FixedSizeExponentialHistogramTests.java | 8 +++ 7 files changed, 66 insertions(+), 36 deletions(-) diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java index 49ee0af339399..f1a1c867d2177 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java @@ -11,6 +11,18 @@ public interface ExponentialHistogram { + // scale of 38 is the largest scale where at the borders we don't run into problems due to floating point precision + // theoretically, a MAX_SCALE of 51 would work and would still cover the entire range of double values + // if we want to use something larger, we'll have to rework the math of converting from double to indices and back + // One option would be to use "Quadruple": https://github.com/m-vokhm/Quadruple + int MAX_SCALE = 38; + + // Only use 62 bit at max to allow to compute the difference between the smallest and largest index without causing overflow + // Also the extra bit gives us room for some tricks for compact storage + int MAX_INDEX_BITS = 62; + long MAX_INDEX = (1L << MAX_INDEX_BITS) - 1; + long MIN_INDEX = -MAX_INDEX; + int scale(); ZeroBucket zeroBucket(); diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java index cb9c2ed75fa25..57a9968a5f444 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java @@ -9,6 +9,10 @@ package org.elasticsearch.exponentialhistogram; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX_BITS; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX; + public class ExponentialHistogramUtils { /** Bit mask used to isolate exponent of IEEE 754 double precision number. */ @@ -76,10 +80,13 @@ public static int compareLowerBoundaries(long idxA, int scaleA, long idxB, int s * of the index. */ public static int getMaximumScaleIncrease(long index) { + if (index < MIN_INDEX || index > MAX_INDEX) { + throw new IllegalArgumentException("index must be in range ["+MIN_INDEX+".."+MAX_INDEX+"]"); + } if (index < 0) { index = ~index; } - return Long.numberOfLeadingZeros(index) - 1; + return Long.numberOfLeadingZeros(index) - (64 - MAX_INDEX_BITS); } public static double getUpperBucketBoundary(long index, int scale) { @@ -93,7 +100,6 @@ public static double getLowerBucketBoundary(long index, int scale) { } public static double getPointOfLeastRelativeError(long bucketIndex, int scale) { - // TODO: handle numeric limits, implement exact algorithms with 128 bit precision double inverseFactor = Math.scalb(LN_2, -scale); return Math.exp(inverseFactor * (bucketIndex + 1/3.0)); } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java index a6e78b0b4a4cd..6d182666ccd2f 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java @@ -11,10 +11,6 @@ public final class FixedSizeExponentialHistogram implements ExponentialHistogramBuilder, ExponentialHistogram { - // scale of 38 is the largest scale where the index computation doesn't suffer much rounding - // if we want to use something larger, we'll have to rework the math - public static final int DEFAULT_BUCKET_SCALE = 38; - private final long[] bucketIndices; private final long[] bucketCounts; private int negativeBucketCount; @@ -32,11 +28,14 @@ public FixedSizeExponentialHistogram(int bucketCount) { void reset() { setZeroBucket(ZeroBucket.minimalEmpty()); - resetBuckets(DEFAULT_BUCKET_SCALE); + resetBuckets(MAX_SCALE); } @Override public void resetBuckets(int newScale) { + if (newScale > MAX_SCALE) { + throw new IllegalArgumentException("scale must be <= MAX_SCALE ("+MAX_SCALE+")"); + } negativeBucketCount = 0; positiveBucketCount = 0; bucketScale = newScale; @@ -54,6 +53,9 @@ public void setZeroBucket(ZeroBucket zeroBucket) { @Override public boolean tryAddBucket(long index, long count, boolean isPositive) { + if (index < MIN_INDEX || index > MAX_INDEX) { + throw new IllegalArgumentException("index must be in range ["+MIN_INDEX+".."+MAX_INDEX+"]"); + } if (isPositive == false && positiveBucketCount > 0) { throw new IllegalArgumentException("Cannot add negative buckets after a positive bucket was added"); } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java index 952c295202849..f1b16b5a21dbe 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java @@ -9,17 +9,18 @@ package org.elasticsearch.exponentialhistogram; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX; import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.compareLowerBoundaries; import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.computeIndex; import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.getLowerBucketBoundary; -import static org.elasticsearch.exponentialhistogram.FixedSizeExponentialHistogram.DEFAULT_BUCKET_SCALE; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE; public record ZeroBucket(long index, int scale, long count) { - private static final ZeroBucket MINIMAL_EMPTY = new ZeroBucket(Long.MIN_VALUE, Integer.MIN_VALUE / 256, 0); + private static final ZeroBucket MINIMAL_EMPTY = new ZeroBucket(MIN_INDEX, Integer.MIN_VALUE / 256, 0); public ZeroBucket(double zeroThreshold, long count) { - this(computeIndex(zeroThreshold, DEFAULT_BUCKET_SCALE) + 1, DEFAULT_BUCKET_SCALE, count); + this(computeIndex(zeroThreshold, MAX_SCALE) + 1, MAX_SCALE, count); } public static ZeroBucket minimalEmpty() { diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java index e28fc650d95f1..b2385003674ad 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java @@ -20,6 +20,7 @@ import java.util.stream.IntStream; import static org.elasticsearch.exponentialhistogram.FixedSizeExponentialHistogramTests.printMidpoints; +import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.equalTo; public class ExponentialHistogramMergerTests extends ESTestCase { @@ -62,7 +63,7 @@ public void testZeroThresholdCollapsesOverlappingBuckets() { third.setZeroBucket(new ZeroBucket(45.0, 1)); mergeResult = mergeWithMinimumScale(100, 0, mergeResult, third); - assertThat(mergeResult.zeroBucket().zeroThreshold(), equalTo(45.0)); + assertThat(mergeResult.zeroBucket().zeroThreshold(), closeTo(45.0, 0.000001)); assertThat(mergeResult.zeroBucket().count(), equalTo(1L + 14L + 42L + 7L)); assertThat(mergeResult.positiveBuckets().hasNext(), equalTo(false)); assertThat(mergeResult.negativeBuckets().hasNext(), equalTo(false)); diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTests.java index a9936d56ff02e..5b1d8154c80cf 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTests.java @@ -13,57 +13,59 @@ import java.util.Random; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX; import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.adjustScale; import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.compareLowerBoundaries; import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.getLowerBucketBoundary; import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.getMaximumScaleIncrease; import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.getUpperBucketBoundary; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.lessThan; import static org.hamcrest.Matchers.lessThanOrEqualTo; public class ExponentialHistogramUtilsTests extends ESTestCase { public void testMaxValue() { - assertThat(getMaximumScaleIncrease(Long.MAX_VALUE), equalTo(0)); - assertThat(getMaximumScaleIncrease(Long.MAX_VALUE >> 1), equalTo(1)); + assertThat(getMaximumScaleIncrease(MAX_INDEX), equalTo(0)); + assertThat(getMaximumScaleIncrease(MAX_INDEX >> 1), equalTo(1)); - assertThat(adjustScale(Long.MAX_VALUE, 0), equalTo(Long.MAX_VALUE)); - assertThat(adjustScale(Long.MAX_VALUE >> 1, 1), equalTo(Long.MAX_VALUE - 1)); - assertThat(adjustScale(Long.MAX_VALUE >> 2, 2), equalTo((Long.MAX_VALUE & ~3) + 1)); - assertThat(adjustScale(Long.MAX_VALUE >> 4, 4), equalTo((Long.MAX_VALUE & ~15) + 6)); + assertThrows(ArithmeticException.class, () -> Math.multiplyExact(MAX_INDEX, 4)); } public void testMinValue() { - assertThat(getMaximumScaleIncrease(Long.MIN_VALUE), equalTo(0)); - assertThat(getMaximumScaleIncrease(Long.MIN_VALUE >> 1), equalTo(1)); - - assertThat(adjustScale(Long.MIN_VALUE, 0), equalTo(Long.MIN_VALUE)); - assertThat(adjustScale(Long.MIN_VALUE >> 1, 1), equalTo(Long.MIN_VALUE)); - assertThat(adjustScale(Long.MIN_VALUE >> 2, 2), equalTo((Long.MIN_VALUE & ~3) + 1)); - assertThat(adjustScale(Long.MIN_VALUE >> 4, 4), equalTo((Long.MIN_VALUE & ~15) + 6)); + assertThat(getMaximumScaleIncrease(MIN_INDEX), equalTo(0)); + assertThat(getMaximumScaleIncrease(MIN_INDEX >> 1), equalTo(1)); } - public void testRandom() { + public void testRandomIndicesScaleAdjustement() { Random rnd = new Random(42); for (int i = 0; i < 100_000; i++) { - long index = rnd.nextLong(); + long index = rnd.nextLong() % MAX_INDEX; int maxScale = getMaximumScaleIncrease(index); assertThat(adjustScale(adjustScale(index, maxScale), -maxScale), equalTo(index)); - assertThrows(ArithmeticException.class, () -> Math.multiplyExact(adjustScale(index, maxScale), 2)); + if (index >0) { + assertThat(adjustScale(index, maxScale) *2, greaterThan(MAX_INDEX)); + } else { + assertThat(adjustScale(index, maxScale) *2, lessThan(MIN_INDEX)); + + } } } - public void testRandomComparison() { + public void testRandomBucketBoundaryComparison() { Random rnd = new Random(42); for (int i = 0; i < 100_000; i++) { - long indexA = rnd.nextLong(); - long indexB = rnd.nextLong(); - int scaleA = rnd.nextInt() % 40; - int scaleB = rnd.nextInt() % 40; + long indexA = rnd.nextLong() % MAX_INDEX; + long indexB = rnd.nextLong() % MAX_INDEX; + int scaleA = rnd.nextInt() % MAX_SCALE; + int scaleB = rnd.nextInt() % MAX_SCALE; double lowerBoundA = getLowerBucketBoundary(indexA, scaleA); while (Double.isInfinite(lowerBoundA)) { @@ -77,7 +79,6 @@ public void testRandomComparison() { } if (lowerBoundA != lowerBoundB) { - System.out.println("Comparing " + lowerBoundA + " to " + lowerBoundB); assertThat(Double.compare(lowerBoundA, lowerBoundB), equalTo(compareLowerBoundaries(indexA, scaleA, indexB, scaleB))); } } @@ -98,10 +99,9 @@ public void testSaneBucketBoundaries() { assertThat(getLowerBucketBoundary(0, 42), equalTo(1.0)); assertThat(getLowerBucketBoundary(1, 0), equalTo(2.0)); assertThat(getLowerBucketBoundary(1, -1), equalTo(4.0)); - assertThat(getLowerBucketBoundary(1, -2), equalTo(16.0)); - double limit1 = getLowerBucketBoundary(Long.MAX_VALUE - 1, 56); - double limit2 = getLowerBucketBoundary(Long.MAX_VALUE, 56); + double limit1 = getLowerBucketBoundary(MIN_INDEX, MAX_SCALE); + double limit2 = getLowerBucketBoundary(MIN_INDEX, MAX_SCALE); assertThat(limit1, lessThanOrEqualTo(limit2)); } } diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTests.java index 72e2fe6702731..6ea2376cc7893 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTests.java @@ -13,8 +13,16 @@ import java.util.stream.IntStream; +import static org.hamcrest.Matchers.equalTo; + public class FixedSizeExponentialHistogramTests extends ESTestCase { + + public void testDefaultZeroBucketHasZeroThreshold() { + ExponentialHistogram histo = ExponentialHistogramGenerator.createFor(); + assertThat(histo.zeroBucket().zeroThreshold(), equalTo(0.0)); + } + public void testPrintBuckets() { ExponentialHistogram first = ExponentialHistogramGenerator.createFor(0.01234, 42, 56789); ExponentialHistogram second = ExponentialHistogramGenerator.createFor(38, 50, 250, 257, 10001.1234); From 92efdcf6c54310cba16a5bcb4776f5dd5d12d581 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Fri, 11 Jul 2025 09:39:27 +0200 Subject: [PATCH 13/32] Fix and clean percentile computation --- .../ExpHistoPercentiles.java | 59 --------- .../ExponentialHistogramQuantile.java | 81 ++++++++++++ .../FixedSizeExponentialHistogramTests.java | 6 +- ...yTests.java => QuantileAccuracyTests.java} | 116 +++++++----------- 4 files changed, 129 insertions(+), 133 deletions(-) delete mode 100644 libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExpHistoPercentiles.java create mode 100644 libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java rename libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/{PercentileAccuracyTests.java => QuantileAccuracyTests.java} (69%) diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExpHistoPercentiles.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExpHistoPercentiles.java deleted file mode 100644 index 24e7fc79db00c..0000000000000 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExpHistoPercentiles.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.exponentialhistogram; - -public class ExpHistoPercentiles { - - public static double getPercentile(ExponentialHistogram histo, double percentile) { - if (percentile < 0 || percentile > 1) { - throw new IllegalArgumentException("percentile must be in range [0, 1]"); - } - - long zeroCount = histo.zeroBucket().count(); - long negCount = getTotalCount(histo.negativeBuckets()); - long posCount = getTotalCount(histo.positiveBuckets()); - - long totalCount = zeroCount + negCount + posCount; - if (totalCount == 0) { - // Can't compute percentile on empty histogram - return Double.NaN; - } - // TODO: Maybe not round, but interpolate between? - long targetRank = Math.round((totalCount - 1) * percentile); - if (targetRank < negCount) { - return -getBucketMidpointForRank(histo.negativeBuckets(), (negCount - 1) - targetRank); - } else if (targetRank < (negCount + zeroCount)) { - return 0.0; // we are in the zero bucket - } else { - return getBucketMidpointForRank(histo.positiveBuckets(), targetRank - (negCount + zeroCount)); - } - } - - private static double getBucketMidpointForRank(ExponentialHistogram.BucketIterator buckets, long rank) { - long seenCount = 0; - while (buckets.hasNext()) { - seenCount += buckets.peekCount(); - if (rank < seenCount) { - return ExponentialHistogramUtils.getPointOfLeastRelativeError(buckets.peekIndex(), buckets.scale()); - } - buckets.advance(); - } - throw new IllegalStateException("buckets contain in total less elements than the desired rank"); - } - - private static long getTotalCount(ExponentialHistogram.BucketIterator buckets) { - long count = 0; - while (buckets.hasNext()) { - count += buckets.peekCount(); - buckets.advance(); - } - return count; - } -} diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java new file mode 100644 index 0000000000000..bc7114bab79b4 --- /dev/null +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java @@ -0,0 +1,81 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.exponentialhistogram; + +public class ExponentialHistogramQuantile { + + /** + * Provides a quantile for the distribution represented by the given histogram. + * + * It returns the value of the element at rank {@code max(0, min( n-1, (quantile * (n+1))-1))}, where rank starts at 0. + * If that value is fractional, we linearly interpolate based on the fraction the values of the two neighboring ranks. + * + * @param histo the histogram representing the distribution + * @param quantile the quantile to query, in the range [0,1] + * @return NaN if the histogram is empty, otherwise the quantile + */ + public static double getQuantile(ExponentialHistogram histo, double quantile) { + if (quantile < 0 || quantile > 1) { + throw new IllegalArgumentException("quantile must be in range [0, 1]"); + } + + long zeroCount = histo.zeroBucket().count(); + long negCount = getTotalCount(histo.negativeBuckets()); + long posCount = getTotalCount(histo.positiveBuckets()); + + long totalCount = zeroCount + negCount + posCount; + if (totalCount == 0) { + // Can't compute quantile on empty histogram + return Double.NaN; + } + + double exactRank = Math.max(0, Math.min(totalCount - 1, (totalCount + 1) * quantile - 1)); + long lowerRank = (long) Math.floor(exactRank); + long upperRank = (long) Math.ceil(exactRank); + double upperFactor = exactRank - lowerRank; + + return getElementAtRank(histo, lowerRank, negCount, zeroCount) * ( 1 - upperFactor) + +getElementAtRank(histo, upperRank, negCount, zeroCount) * upperFactor; + } + + private static double getElementAtRank(ExponentialHistogram histo, long rank, long negCount, long zeroCount) { + if (rank < negCount) { + return -getBucketMidpointForRank(histo.negativeBuckets(), (negCount - 1) - rank); + } else if (rank < (negCount + zeroCount)) { + return 0.0; + } else { + return getBucketMidpointForRank(histo.positiveBuckets(), rank - (negCount + zeroCount)); + } + } + + private static double getBucketMidpointForRank(ExponentialHistogram.BucketIterator buckets, long rank) { + long seenCount = 0; + while (buckets.hasNext()) { + seenCount += buckets.peekCount(); + if (rank < seenCount) { + double prev = ExponentialHistogramUtils.getLowerBucketBoundary(buckets.peekIndex(), buckets.scale()); + double next = ExponentialHistogramUtils.getLowerBucketBoundary(buckets.peekIndex()+1, buckets.scale()); + double result = ExponentialHistogramUtils.getPointOfLeastRelativeError(buckets.peekIndex(), buckets.scale()); + return result; + } + buckets.advance(); + } + throw new IllegalStateException("buckets contain in total less elements than the desired rank"); + } + + private static long getTotalCount(ExponentialHistogram.BucketIterator buckets) { + long count = 0; + while (buckets.hasNext()) { + count += buckets.peekCount(); + buckets.advance(); + } + return count; + } +} diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTests.java index 6ea2376cc7893..3bb609ef37719 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTests.java @@ -38,9 +38,9 @@ public void testPrintBucketsLinearScale() { IntStream.range(-1_000_000, 2_000_000).mapToDouble(Double::valueOf) ); - double smallPerc = ExpHistoPercentiles.getPercentile(result, 0.00001); - double highPerc = ExpHistoPercentiles.getPercentile(result, 0.9999); - double median = ExpHistoPercentiles.getPercentile(result, 0.5); + double smallPerc = ExponentialHistogramQuantile.getQuantile(result, 0.00001); + double highPerc = ExponentialHistogramQuantile.getQuantile(result, 0.9999); + double median = ExponentialHistogramQuantile.getQuantile(result, 0.5); printMidpoints(result); } diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/PercentileAccuracyTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java similarity index 69% rename from libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/PercentileAccuracyTests.java rename to libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java index cf6eba4976ee3..a5d678eec94b5 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/PercentileAccuracyTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java @@ -23,66 +23,58 @@ import java.util.Arrays; import java.util.Random; +import java.util.stream.DoubleStream; +import java.util.stream.IntStream; import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.lessThan; -public class PercentileAccuracyTests extends ESTestCase { +public class QuantileAccuracyTests extends ESTestCase { - public static final double[] PERCENTILES_TO_TEST = { 0, 0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 1.0 }; + public static final double[] QUANTILES_TO_TEST = { 0, 0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 1.0 }; + + public void testBasicSmall() { + DoubleStream values = IntStream.range(1,10).mapToDouble(Double::valueOf); + testQuantileAccuracy(values.toArray(), 100); + } - /** - * Test percentile accuracy with uniform distribution - */ public void testUniformDistribution() { - testDistributionPercentileAccuracy(new UniformRealDistribution(new Well19937c(42), 0, 100), 50000, 500); + testDistributionQuantileAccuracy(new UniformRealDistribution(new Well19937c(42), 0, 100), 50000, 500); } - /** - * Test percentile accuracy with normal distribution - */ public void testNormalDistribution() { - testDistributionPercentileAccuracy(new NormalDistribution(new Well19937c(42), 100, 15), 50000, 500); + testDistributionQuantileAccuracy(new NormalDistribution(new Well19937c(42), 100, 15), 50000, 500); } - /** - * Test percentile accuracy with exponential distribution - */ public void testExponentialDistribution() { - testDistributionPercentileAccuracy(new ExponentialDistribution(new Well19937c(42), 10), 50000, 500); + testDistributionQuantileAccuracy(new ExponentialDistribution(new Well19937c(42), 10), 50000, 500); } - /** - * Test percentile accuracy with log-normal distribution - */ public void testLogNormalDistribution() { - testDistributionPercentileAccuracy(new LogNormalDistribution(new Well19937c(42), 0, 1), 50000, 500); + testDistributionQuantileAccuracy(new LogNormalDistribution(new Well19937c(42), 0, 1), 50000, 500); } - /** - * Test percentile accuracy with gamma distribution - */ public void testGammaDistribution() { - testDistributionPercentileAccuracy(new GammaDistribution(new Well19937c(42), 2, 5), 50000, 500); + testDistributionQuantileAccuracy(new GammaDistribution(new Well19937c(42), 2, 5), 50000, 500); } - /** - * Test percentile accuracy with beta distribution - */ public void testBetaDistribution() { - testDistributionPercentileAccuracy(new BetaDistribution(new Well19937c(42), 2, 5), 50000, 500); + testDistributionQuantileAccuracy(new BetaDistribution(new Well19937c(42), 2, 5), 50000, 500); } - /** - * Test percentile accuracy with Weibull distribution - */ public void testWeibullDistribution() { - testDistributionPercentileAccuracy(new WeibullDistribution(new Well19937c(42), 2, 5), 50000, 500); + testDistributionQuantileAccuracy(new WeibullDistribution(new Well19937c(42), 2, 5), 50000, 500); + } + + public void testBigJump() { + double[] values = DoubleStream.concat( + IntStream.range(0,18).mapToDouble(Double::valueOf), + DoubleStream.of(1_000_000.0) + ).toArray(); + + testQuantileAccuracy(values, 500); } - /** - * Test how bucket count affects percentile accuracy - */ public void testBucketCountImpact() { RealDistribution distribution = new LogNormalDistribution(new Well19937c(42), 0, 1); int sampleSize = 50000; @@ -91,19 +83,16 @@ public void testBucketCountImpact() { // Test with different bucket counts int[] bucketCounts = { 10, 50, 100, 200, 500 }; for (int bucketCount : bucketCounts) { - double maxError = testPercentileAccuracy(values, bucketCount); + double maxError = testQuantileAccuracy(values, bucketCount); logger.info("Bucket count: " + bucketCount + ", Max relative error: " + maxError); } // Verify that more buckets generally means better accuracy - double errorWithFewBuckets = testPercentileAccuracy(values, 20); - double errorWithManyBuckets = testPercentileAccuracy(values, 200); + double errorWithFewBuckets = testQuantileAccuracy(values, 20); + double errorWithManyBuckets = testQuantileAccuracy(values, 200); assertThat("More buckets should improve accuracy", errorWithManyBuckets, lessThan(errorWithFewBuckets)); } - /** - * Test percentile accuracy with mixed positive and negative values - */ public void testMixedSignValues() { Random random = new Random(42); double[] values = new double[10000]; @@ -111,12 +100,9 @@ public void testMixedSignValues() { values[i] = (random.nextDouble() * 200) - 100; // Range from -100 to 100 } - testPercentileAccuracy(values, 100); + testQuantileAccuracy(values, 100); } - /** - * Test percentile accuracy with skewed data - */ public void testSkewedData() { // Create a highly skewed dataset Random random = new Random(42); @@ -131,12 +117,9 @@ public void testSkewedData() { } } - testPercentileAccuracy(values, 100); + testQuantileAccuracy(values, 100); } - /** - * Test percentile accuracy with data containing zeros - */ public void testDataWithZeros() { Random random = new Random(42); double[] values = new double[10000]; @@ -149,21 +132,15 @@ public void testDataWithZeros() { } } - testPercentileAccuracy(values, 100); + testQuantileAccuracy(values, 100); } - /** - * Helper method to test percentile accuracy for a given distribution - */ - private void testDistributionPercentileAccuracy(RealDistribution distribution, int sampleSize, int bucketCount) { + private void testDistributionQuantileAccuracy(RealDistribution distribution, int sampleSize, int bucketCount) { double[] values = generateSamples(distribution, sampleSize); - testPercentileAccuracy(values, bucketCount); + testQuantileAccuracy(values, bucketCount); } - /** - * Helper method to generate samples from a distribution - */ - private double[] generateSamples(RealDistribution distribution, int sampleSize) { + private static double[] generateSamples(RealDistribution distribution, int sampleSize) { double[] values = new double[sampleSize]; for (int i = 0; i < sampleSize; i++) { values[i] = distribution.sample(); @@ -171,10 +148,7 @@ private double[] generateSamples(RealDistribution distribution, int sampleSize) return values; } - /** - * Helper method to test percentile accuracy for a given dataset - */ - private double testPercentileAccuracy(double[] values, int bucketCount) { + private double testQuantileAccuracy(double[] values, int bucketCount) { // Create histogram ExponentialHistogram histogram = ExponentialHistogramGenerator.createFor(bucketCount, Arrays.stream(values)); @@ -185,17 +159,17 @@ private double testPercentileAccuracy(double[] values, int bucketCount) { double allowedError = getMaximumRelativeError(values, bucketCount); double maxError = 0; - // Compare histogram percentiles with exact percentiles - for (double p : PERCENTILES_TO_TEST) { + // Compare histogram quantiles with exact quantiles + for (double q : QUANTILES_TO_TEST) { double exactValue; - if (p == 0) { + if (q == 0) { exactValue = Arrays.stream(values).min().getAsDouble(); - } else if (p == 1) { + } else if (q == 1) { exactValue = Arrays.stream(values).max().getAsDouble(); } else { - exactValue = exactPercentile.evaluate(p * 100); + exactValue = exactPercentile.evaluate(q * 100); } - double histoValue = ExpHistoPercentiles.getPercentile(histogram, p); + double histoValue = ExponentialHistogramQuantile.getQuantile(histogram, q); // Skip comparison if exact value is zero to avoid division by zero if (Math.abs(exactValue) < 1e-10) { @@ -207,8 +181,8 @@ private double testPercentileAccuracy(double[] values, int bucketCount) { logger.info( String.format( - "Percentile %.2f: Exact=%.6f, Histogram=%.6f, Relative Error=%.8f, Allowed Relative Error=%.8f", - p, + "Quantile %.2f: Exact=%.6f, Histogram=%.6f, Relative Error=%.8f, Allowed Relative Error=%.8f", + q, exactValue, histoValue, relativeError, @@ -217,7 +191,7 @@ private double testPercentileAccuracy(double[] values, int bucketCount) { ); assertThat( - String.format("Percentile %.2f should be accurate within %.6f%% relative error", p, allowedError * 100), + String.format("Quantile %.2f should be accurate within %.6f%% relative error", q, allowedError * 100), histoValue, closeTo(exactValue, Math.abs(exactValue * allowedError)) ); @@ -231,7 +205,7 @@ private double testPercentileAccuracy(double[] values, int bucketCount) { * The error depends on the raw values put into the histogram and the number of buckets allowed. * This is an implementation of the error bound computation proven by Theorem 3 in the UDDSketch paper */ - private double getMaximumRelativeError(double[] values, int bucketCount) { + private static double getMaximumRelativeError(double[] values, int bucketCount) { double smallestAbsNegative = Double.MAX_VALUE; double largestAbsNegative = 0; double smallestPositive = Double.MAX_VALUE; From e6924e948d6e44f1b74fb5b01868c0ae3442e71a Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Fri, 11 Jul 2025 12:57:12 +0200 Subject: [PATCH 14/32] Add some tests based on TDigestTest --- .../QuantileAccuracyTests.java | 72 ++++++++++++++++--- 1 file changed, 62 insertions(+), 10 deletions(-) diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java index a5d678eec94b5..4be298cf10aaf 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java @@ -28,16 +28,12 @@ import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.lessThan; +import static org.hamcrest.Matchers.notANumber; public class QuantileAccuracyTests extends ESTestCase { public static final double[] QUANTILES_TO_TEST = { 0, 0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 1.0 }; - public void testBasicSmall() { - DoubleStream values = IntStream.range(1,10).mapToDouble(Double::valueOf); - testQuantileAccuracy(values.toArray(), 100); - } - public void testUniformDistribution() { testDistributionQuantileAccuracy(new UniformRealDistribution(new Well19937c(42), 0, 100), 50000, 500); } @@ -66,13 +62,69 @@ public void testWeibullDistribution() { testDistributionQuantileAccuracy(new WeibullDistribution(new Well19937c(42), 2, 5), 50000, 500); } + public void testBasicSmall() { + DoubleStream values = IntStream.range(1, 10).mapToDouble(Double::valueOf); + double maxError = testQuantileAccuracy(values.toArray(), 100); + assertThat(maxError, lessThan(0.000001)); + } + public void testBigJump() { - double[] values = DoubleStream.concat( - IntStream.range(0,18).mapToDouble(Double::valueOf), - DoubleStream.of(1_000_000.0) - ).toArray(); + double[] values = DoubleStream.concat(IntStream.range(0, 18).mapToDouble(Double::valueOf), DoubleStream.of(1_000_000.0)).toArray(); + + double maxError = testQuantileAccuracy(values, 500); + assertThat(maxError, lessThan(0.000001)); + } - testQuantileAccuracy(values, 500); + public void testExplicitSkewedData() { + double[] data = new double[] { + 245, + 246, + 247.249, + 240, + 243, + 248, + 250, + 241, + 244, + 245, + 245, + 247, + 243, + 242, + 241, + 50100, + 51246, + 52247, + 52249, + 51240, + 53243, + 59248, + 59250, + 57241, + 56244, + 55245, + 56245, + 575247, + 58243, + 51242, + 54241 }; + + double maxError = testQuantileAccuracy(data, data.length / 2); + assertThat(maxError, lessThan(0.007)); + } + + public void testEmptyHistogram() { + ExponentialHistogram histo = ExponentialHistogramGenerator.createFor(); + for (double q : QUANTILES_TO_TEST) { + assertThat(ExponentialHistogramQuantile.getQuantile(histo, q), notANumber()); + } + } + + public void testSingleValueHistogram() { + ExponentialHistogram histo = ExponentialHistogramGenerator.createFor(42); + for (double q : QUANTILES_TO_TEST) { + assertThat(ExponentialHistogramQuantile.getQuantile(histo, q), closeTo(42, 0.0000001)); + } } public void testBucketCountImpact() { From cab3fdfa61eae0c4b5a3fb56d5a3906113234070 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Mon, 14 Jul 2025 16:21:47 +0200 Subject: [PATCH 15/32] Clean up, bug fixes and javadoc --- gradle/verification-metadata.xml | 5 + libs/exponential-histogram/build.gradle | 1 + .../exponentialhistogram/DownscaleStats.java | 36 ++- .../ExponentialHistogram.java | 102 ++++++- .../ExponentialHistogramGenerator.java | 42 ++- .../ExponentialHistogramMerger.java | 55 ++-- .../ExponentialHistogramQuantile.java | 9 +- .../ExponentialHistogramUtils.java | 156 ----------- .../ExponentialScaleUtils.java | 265 ++++++++++++++++++ .../FixedCapacityExponentialHistogram.java | 207 ++++++++++++++ .../FixedSizeExponentialHistogram.java | 159 ----------- .../MergingBucketIterator.java | 4 +- .../ScaleAdjustingBucketIterator.java | 10 +- .../exponentialhistogram/ZeroBucket.java | 6 +- .../DownscaleStatsTests.java | 2 +- .../ExponentialHistogramGeneratorTests.java | 4 +- .../ExponentialHistogramMergerTests.java | 13 +- .../ExponentialHistogramUtilsTests.java | 107 ------- .../ExponentialScaleUtilsTests.java | 198 +++++++++++++ .../FixedSizeExponentialHistogramTests.java | 72 ----- .../ZeroBucketTests.java} | 10 +- 21 files changed, 887 insertions(+), 576 deletions(-) delete mode 100644 libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java create mode 100644 libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java create mode 100644 libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java delete mode 100644 libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java delete mode 100644 libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTests.java create mode 100644 libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtilsTests.java delete mode 100644 libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTests.java rename libs/exponential-histogram/src/{main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramBuilder.java => test/java/org/elasticsearch/exponentialhistogram/ZeroBucketTests.java} (65%) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 7e147eff76dbd..3230c8d9396ab 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -66,6 +66,11 @@ + + + + + diff --git a/libs/exponential-histogram/build.gradle b/libs/exponential-histogram/build.gradle index 87243ed2f6536..7115c2086c890 100644 --- a/libs/exponential-histogram/build.gradle +++ b/libs/exponential-histogram/build.gradle @@ -13,5 +13,6 @@ apply plugin: 'elasticsearch.build' dependencies { testImplementation(project(":test:framework")) + testImplementation('ch.obermuhlner:big-math:2.3.2') testImplementation('org.apache.commons:commons-math3:3.6.1') } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java index d2748d85fb7f4..cfd210b419be9 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java @@ -9,18 +9,28 @@ package org.elasticsearch.exponentialhistogram; -public class DownscaleStats { +import java.util.Arrays; - // collapsedCount[i] represents the number of assitional - // collapsed buckets when increasing the scale by (i+1) instead of (i) - int[] collapsedCount = new int[63]; +/** + * Data structure for effectively computing by how much the scale of a histogram needs to be reduced to reach a target bucket count. + * This works by looking at each pair of neighboring buckets and checking at which scale reduction they would collapse to a single bucket. + */ +class DownscaleStats { + + // collapsedBucketCount[i] represents the number of additional + // collapsed buckets when increasing the scale by (i+1) instead of just by (i) + int[] collapsedBucketCount = new int[63]; + + void reset() { + Arrays.fill(collapsedBucketCount, 0); + } void add(long previousBucketIndex, long currentBucketIndex) { if (currentBucketIndex <= previousBucketIndex) { throw new IllegalArgumentException("currentBucketIndex must be bigger than previousBucketIndex"); } /* Below is an efficient variant of the following algorithm: - for (int i=0; i<64; i++) { + for (int i=0; i<63; i++) { if (prevIndex>>(i+1) == currIndex>>(i+1)) { collapsedBucketCount[i]++; break; @@ -35,28 +45,28 @@ void add(long previousBucketIndex, long currentBucketIndex) { return; } int requiredScaleChange = 64 - numEqualLeadingBits; - collapsedCount[requiredScaleChange - 1]++; + collapsedBucketCount[requiredScaleChange - 1]++; } int getCollapsedBucketCountAfterScaleReduction(int reduction) { int totalCollapsed = 0; for (int i = 0; i < reduction; i++) { - totalCollapsed += collapsedCount[i]; + totalCollapsed += collapsedBucketCount[i]; } return totalCollapsed; } - public int getRequiredScaleReductionToReduceBucketCountBy(int desiredReduction) { - if (desiredReduction == 0) { + int getRequiredScaleReductionToReduceBucketCountBy(int desiredCollapsedBucketCount) { + if (desiredCollapsedBucketCount == 0) { return 0; } int totalCollapsed = 0; - for (int i = 0; i < collapsedCount.length; i++) { - totalCollapsed += collapsedCount[i]; - if (totalCollapsed >= desiredReduction) { + for (int i = 0; i < collapsedBucketCount.length; i++) { + totalCollapsed += collapsedBucketCount[i]; + if (totalCollapsed >= desiredCollapsedBucketCount) { return i + 1; } } - throw new IllegalArgumentException("it is not possible to reduce the bucket count by " + desiredReduction); + throw new IllegalArgumentException("it is not possible to reduce the bucket count by " + desiredCollapsedBucketCount); } } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java index f1a1c867d2177..cd77873e2231d 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java @@ -9,49 +9,133 @@ package org.elasticsearch.exponentialhistogram; +import java.util.OptionalLong; + +/** + * Interface for implementations of exponential histograms adhering to the opentelemetry definition. + * This interface explicitly allows for sparse implementation: It does not offer to directly access buckets by index, instead it + * is only possible to iterate over the buckets.
+ * The most important properties are: + *

+ * + *
+ * In addition, in all algorithms we make a central assumption about the distribution of samples within each bucket: + * We assume they all lie on the single point of least error relative to the bucket boundaries (see {@link ExponentialScaleUtils#getPointOfLeastRelativeError(long, int)}). + */ public interface ExponentialHistogram { - // scale of 38 is the largest scale where at the borders we don't run into problems due to floating point precision - // theoretically, a MAX_SCALE of 51 would work and would still cover the entire range of double values - // if we want to use something larger, we'll have to rework the math of converting from double to indices and back + //TODO: support min/max/sum/count storage and merging + //TODO: Add special positive and negative infinity buckets to allow representation of explicit bucket histograms with open boundaries + + // scale of 38 is the largest scale where at the borders we don't run into problems due to floating point precision when computing + // indices for double values + // Theoretically, a MAX_SCALE of 51 would work and would still cover the entire range of double values + // For that to work, we'll have to rework the math of converting from double to indices and back // One option would be to use "Quadruple": https://github.com/m-vokhm/Quadruple int MAX_SCALE = 38; + // Add this scale all double values already fall into a single bucket + int MIN_SCALE = -11; + // Only use 62 bit at max to allow to compute the difference between the smallest and largest index without causing overflow // Also the extra bit gives us room for some tricks for compact storage int MAX_INDEX_BITS = 62; long MAX_INDEX = (1L << MAX_INDEX_BITS) - 1; long MIN_INDEX = -MAX_INDEX; + /** + * The scale of the histogram. Higher scales result in higher accuracy, but potentially higher bucket count. + * Must be less than or equal to {@link #MAX_SCALE} and greater than or equal to {@link #MIN_SCALE}. + */ int scale(); + /** + * @return the {@link ZeroBucket} representing the number of zero (or close to zero) values and its threshold + */ ZeroBucket zeroBucket(); - BucketIterator positiveBuckets(); + /** + * @return a {@link BucketIterator} for the populated, positive buckets of this histogram. {@link BucketIterator#scale()} of the return value must return the same value as {@link #scale()}. + */ + CopyableBucketIterator positiveBuckets(); - BucketIterator negativeBuckets(); + /** + * @return a {@link BucketIterator} for the populated, negative buckets of this histogram. {@link BucketIterator#scale()} of the return value must return the same value as {@link #scale()}. + */ + CopyableBucketIterator negativeBuckets(); /** - * Returns the highest populated bucket index, taking both negative and positive buckets into account; - * If there are no buckets populated, Long.MIN_VALUE shall be returned. + * Returns the highest populated bucket index, taking both negative and positive buckets into account. + * If there are neither positive nor negative buckets populated, an empty optional is returned. */ - long maximumBucketIndex(); + OptionalLong maximumBucketIndex(); /** - * Iterator over the non-empty buckets. + * Iterator over non-empty buckets of the histogram. Can represent either the positive or negative histogram range. + * */ interface BucketIterator { + /** + * Checks if there are any buckets remaining to be visited by this iterator. + * If the end has been reached, it is illegal to call {@link #peekCount()}, {@link #peekIndex()} or {@link #advance()}. + * + * @return false, if the end has been reached, true otherwise. + */ boolean hasNext(); + /** + * The number of items in the bucket this iterator currently points at. Does not advance the iterator by itself and therefore can be called repeatedly to return the same value. + * Must not be called if {@link #hasNext()} returns false. + * + * @return the number of items in the bucket, always greater than zero + */ long peekCount(); + /** + * The index of the bucket this iterator currently points at. Does not advance the iterator by itself and therefore can be called repeatedly to return the same value. + * Must not be called if {@link #hasNext()} returns false. + * + * @return the index of the bucket, guaranteed to be in the range [{@link #MIN_INDEX}, {@link #MAX_INDEX}] + */ long peekIndex(); + /** + * Moves the iterator to the next, non-empty bucket. + * If {@link #hasNext()} is true after {@link #advance()}, {@link #peekIndex()} is guaranteed to return a value bigger than prior to the {@link #advance()} call. + */ void advance(); + /** + * Provides the scale that can be used to convert indices returned by {@link #peekIndex()} to the bucket boundaries, + * e.g. via {@link ExponentialScaleUtils#getLowerBucketBoundary(long, int)}. + * + * @return the scale, which is guaranteed to be constant over the lifetime of this iterator. + */ int scale(); BucketIterator copy(); } + /** + * A {@link BucketIterator} which can be copied. + */ + interface CopyableBucketIterator extends BucketIterator { + + /** + * Provides a bucket iterator pointing at the same bucket of the same range of buckets as this iterator. + * Calling {@link #advance()} on the copied iterator does not affect this and vice-versa. + */ + CopyableBucketIterator copy(); + } + } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java index 7699ea0cfb939..578e2267db289 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java @@ -12,28 +12,39 @@ import java.util.Arrays; import java.util.stream.DoubleStream; -import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.computeIndex; +import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.computeIndex; /** - * Class for generating a histogram from raw values. + * Class for accumulating raw values into an {@link ExponentialHistogram} with a given maximum bucket count. + * + * If the number of values is less than or equal the bucket capacity, the resulting histogram is guaranteed + * to represent the exact raw values with a relative error less than 2^(2^-MAX_SCALE) - 1 */ public class ExponentialHistogramGenerator { + // Merging individual values into a histogram would way to slow with our sparse, array-backed histogram representation + // Therefore for a bucket capacity of c, we first buffer c raw values to be inserted + // we then turn those into an "exact" histogram, which in turn we merge with our actual result accumulator + // This yields an amortized runtime of O( log(c) ) private final double[] rawValueBuffer; int valueCount; private final ExponentialHistogramMerger resultMerger; - private final FixedSizeExponentialHistogram valueBuffer; + private final FixedCapacityExponentialHistogram valueBuffer; private boolean isFinished = false; - public ExponentialHistogramGenerator(int numBuckets) { - rawValueBuffer = new double[numBuckets]; + public ExponentialHistogramGenerator(int maxBucketCount) { + rawValueBuffer = new double[maxBucketCount]; valueCount = 0; - valueBuffer = new FixedSizeExponentialHistogram(numBuckets); - resultMerger = new ExponentialHistogramMerger(numBuckets); + valueBuffer = new FixedCapacityExponentialHistogram(maxBucketCount); + resultMerger = new ExponentialHistogramMerger(maxBucketCount); } + /** + * Add the given value to the histogram. + * Must not be calles after {@link #get()} has been called. + */ public void add(double value) { if (isFinished) { throw new IllegalStateException("get() has already been called"); @@ -45,19 +56,28 @@ public void add(double value) { valueCount++; } + /** + * @return the histogram representing the distribution of all accumulated values. + */ public ExponentialHistogram get() { - if (isFinished) { - throw new IllegalStateException("get() has already been called"); - } isFinished = true; mergeValuesToHistogram(); return resultMerger.get(); } + /** + * Create a histogram representing the distribution of the given values. + * The histogram will have a bucket count of at most the length of the provided array + * and will have a relative error less than 2^(2^-MAX_SCALE) - 1. + */ public static ExponentialHistogram createFor(double... values) { return createFor(values.length, Arrays.stream(values)); } - + /** + * Create a histogram representing the distribution of the given values with at most the given number of buckets. + * If the given bucketCount is greater or equal to the number of values, the resulting histogram will have a + * relative error of less than 2^(2^-MAX_SCALE) - 1. + */ public static ExponentialHistogram createFor(int bucketCount, DoubleStream values) { ExponentialHistogramGenerator generator = new ExponentialHistogramGenerator(bucketCount); values.forEach(generator::add); diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java index 0bbd3c3887772..929cfe0e18e10 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java @@ -10,23 +10,34 @@ package org.elasticsearch.exponentialhistogram; import java.util.Arrays; +import java.util.OptionalLong; import java.util.stream.Stream; -import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.getMaximumScaleIncrease; +import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.getMaximumScaleIncrease; +/** + * Allows accumulating multiple {@link ExponentialHistogram} into a single one while keeping the bucket count in the result below a given limit. + */ public class ExponentialHistogramMerger { - FixedSizeExponentialHistogram result; - FixedSizeExponentialHistogram buffer; + // Our algorithm is not in-place, therefore we use two histograms and ping-pong between them + private FixedCapacityExponentialHistogram result; + private FixedCapacityExponentialHistogram buffer; + + private final DownscaleStats downscaleStats; private boolean isFinished; - public ExponentialHistogramMerger(int resultBucketCount) { - result = new FixedSizeExponentialHistogram(resultBucketCount); - buffer = new FixedSizeExponentialHistogram(resultBucketCount); + /** + * @param bucketLimit the maximum number of buckets the result histogram is allowed to have + */ + public ExponentialHistogramMerger(int bucketLimit) { + downscaleStats = new DownscaleStats(); + result = new FixedCapacityExponentialHistogram(bucketLimit); + buffer = new FixedCapacityExponentialHistogram(bucketLimit); } - // Only inteded for testing, using this in production means an unnecessary reduction of precision + // Only intended for testing, using this in production means an unnecessary reduction of precision ExponentialHistogramMerger(int resultBucketCount, int minScale) { this(resultBucketCount); result.resetBuckets(minScale); @@ -38,7 +49,7 @@ public void add(ExponentialHistogram toAdd) { throw new IllegalStateException("get() has already been called"); } merge(buffer, result, toAdd); - FixedSizeExponentialHistogram temp = result; + FixedCapacityExponentialHistogram temp = result; result = buffer; buffer = temp; } @@ -61,8 +72,12 @@ public static ExponentialHistogram merge(int bucketCount, Stream b.scale()) { - boolean isNonEmpty = false; if (posBucketsB.hasNext()) { - isNonEmpty = true; - targetScale = Math.min(targetScale, b.scale() + getMaximumScaleIncrease(posBucketsB.peekIndex())); + long smallestIndex = posBucketsB.peekIndex(); + targetScale = Math.min(targetScale, b.scale() + getMaximumScaleIncrease(smallestIndex)); } if (negBucketsB.hasNext()) { - isNonEmpty = true; - targetScale = Math.min(targetScale, b.scale() + getMaximumScaleIncrease(negBucketsB.peekIndex())); + long smallestIndex = negBucketsB.peekIndex(); + targetScale = Math.min(targetScale, b.scale() + getMaximumScaleIncrease(smallestIndex)); } - if (isNonEmpty) { - targetScale = Math.min(targetScale, b.scale() + getMaximumScaleIncrease(b.maximumBucketIndex())); + OptionalLong maxIndex = b.maximumBucketIndex(); + if (maxIndex.isPresent()) { + targetScale = Math.min(targetScale, b.scale() + getMaximumScaleIncrease(maxIndex.getAsLong())); } } @@ -100,7 +115,7 @@ private static void merge(ExponentialHistogramBuilder output, ExponentialHistogr MergingBucketIterator negativeMerged = new MergingBucketIterator(negBucketsA.copy(), negBucketsB.copy(), targetScale); output.resetBuckets(targetScale); - DownscaleStats downscaleStats = new DownscaleStats(); + downscaleStats.reset(); int overflowCount = putBuckets(output, negativeMerged, false, downscaleStats); overflowCount += putBuckets(output, positiveMerged, true, downscaleStats); @@ -121,7 +136,7 @@ private static void merge(ExponentialHistogramBuilder output, ExponentialHistogr } private static int putBuckets( - ExponentialHistogramBuilder output, + FixedCapacityExponentialHistogram output, ExponentialHistogram.BucketIterator buckets, boolean isPositive, DownscaleStats downscaleStats diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java index bc7114bab79b4..711d2dfc191a9 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java @@ -9,6 +9,9 @@ package org.elasticsearch.exponentialhistogram; +/** + * Quantile estimation for {@link ExponentialHistogram}s. + */ public class ExponentialHistogramQuantile { /** @@ -41,6 +44,7 @@ public static double getQuantile(ExponentialHistogram histo, double quantile) { long upperRank = (long) Math.ceil(exactRank); double upperFactor = exactRank - lowerRank; + // TODO: if we want more performance here, we could iterate the buckets once instead of twice return getElementAtRank(histo, lowerRank, negCount, zeroCount) * ( 1 - upperFactor) +getElementAtRank(histo, upperRank, negCount, zeroCount) * upperFactor; } @@ -60,10 +64,7 @@ private static double getBucketMidpointForRank(ExponentialHistogram.BucketIterat while (buckets.hasNext()) { seenCount += buckets.peekCount(); if (rank < seenCount) { - double prev = ExponentialHistogramUtils.getLowerBucketBoundary(buckets.peekIndex(), buckets.scale()); - double next = ExponentialHistogramUtils.getLowerBucketBoundary(buckets.peekIndex()+1, buckets.scale()); - double result = ExponentialHistogramUtils.getPointOfLeastRelativeError(buckets.peekIndex(), buckets.scale()); - return result; + return ExponentialScaleUtils.getPointOfLeastRelativeError(buckets.peekIndex(), buckets.scale()); } buckets.advance(); } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java deleted file mode 100644 index 57a9968a5f444..0000000000000 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.exponentialhistogram; - -import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX; -import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX_BITS; -import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX; - -public class ExponentialHistogramUtils { - - /** Bit mask used to isolate exponent of IEEE 754 double precision number. */ - private static final long EXPONENT_BIT_MASK = 0x7FF0000000000000L; - - /** Bit mask used to isolate the significand of IEEE 754 double precision number. */ - private static final long SIGNIFICAND_BIT_MASK = 0xFFFFFFFFFFFFFL; - - /** Bias used in representing the exponent of IEEE 754 double precision number. */ - private static final int EXPONENT_BIAS = 1023; - - /** - * The number of bits used to represent the significand of IEEE 754 double precision number, - * excluding the implicit bit. - */ - private static final int SIGNIFICAND_WIDTH = 52; - - /** The number of bits used to represent the exponent of IEEE 754 double precision number. */ - private static final int EXPONENT_WIDTH = 11; - - private static final double LN_2 = Math.log(2); - private static final double LOG_BASE2_E = 1D / LN_2; - - // Magic number, computed via log(4/3)/log(2^(2^-64)), but exact - private static final long SCALE_UP_64_OFFSET = 7656090530189244512L; - - static long adjustScale(long index, int scaleAdjustment) { - if (scaleAdjustment <= 0) { - return index >> -scaleAdjustment; - } else { - // ((index << 64) + SCALE_UP_64_OFFSET)) >> (64-scaleAdjustment) - // = index << scaleAdjustment + SCALE_UP_64_OFFSET >> (64-scaleAdjustment) - return (index << scaleAdjustment) + (SCALE_UP_64_OFFSET >> (64 - scaleAdjustment)); - } - } - - /** - * Equivalent to mathematically correct comparison of the lower bucket boundaries of the given buckets - */ - public static int compareLowerBoundaries(long idxA, int scaleA, long idxB, int scaleB) { - if (scaleA > scaleB) { - return -compareLowerBoundaries(idxB, scaleB, idxA, scaleA); - } - // scaleA <= scaleB - int shifts = scaleB - scaleA; - int maxScaleAdjustment = getMaximumScaleIncrease(idxA); - if (maxScaleAdjustment < shifts) { - // we would overflow if we adjust A to the scale of B - // so if A is negative, scaling would produce a number less than Long.MIN_VALUE, therefore it is definitely smaller than B - // if A is positive, scaling would produce a number bigger than Long.MAX_VALUE, therefore it is definitely bigger than B - // if A is zero => shifting and therefore scale adjustment would not have any effect - if (idxA == 0) { - return Long.compare(0, idxB); - } else { - return idxA < 0 ? -1 : +1; - } - } else { - long adjustedIdxA = idxA << shifts; - return Long.compare(adjustedIdxA, idxB); - } - } - - /** - * Returns the maximum permissible scale-increase which does not cause an overflow - * of the index. - */ - public static int getMaximumScaleIncrease(long index) { - if (index < MIN_INDEX || index > MAX_INDEX) { - throw new IllegalArgumentException("index must be in range ["+MIN_INDEX+".."+MAX_INDEX+"]"); - } - if (index < 0) { - index = ~index; - } - return Long.numberOfLeadingZeros(index) - (64 - MAX_INDEX_BITS); - } - - public static double getUpperBucketBoundary(long index, int scale) { - return getLowerBucketBoundary(index + 1, scale); - } - - public static double getLowerBucketBoundary(long index, int scale) { - // TODO: handle numeric limits, implement by splitting the index into two 32 bit integers - double inverseFactor = Math.scalb(LN_2, -scale); - return Math.exp(inverseFactor * index); - } - - public static double getPointOfLeastRelativeError(long bucketIndex, int scale) { - double inverseFactor = Math.scalb(LN_2, -scale); - return Math.exp(inverseFactor * (bucketIndex + 1/3.0)); - } - - static long computeIndex(double value, int scale) { - double absValue = Math.abs(value); - // For positive scales, compute the index by logarithm, which is simpler but may be - // inaccurate near bucket boundaries - if (scale > 0) { - return getIndexByLogarithm(absValue, scale); - } - // For scale zero, compute the exact index by extracting the exponent - if (scale == 0) { - return mapToIndexScaleZero(absValue); - } - // For negative scales, compute the exact index by extracting the exponent and shifting it to - // the right by -scale - return mapToIndexScaleZero(absValue) >> -scale; - } - - /** - * Compute the bucket index using a logarithm based approach. - * - * @see All - * Scales: Use the Logarithm Function - */ - private static long getIndexByLogarithm(double value, int scale) { - double scaleFactor = Math.scalb(LOG_BASE2_E, scale); - return (long) Math.ceil(Math.scalb(Math.log(value) * LOG_BASE2_E, scale)) - 1; - } - - /** - * Compute the exact bucket index for scale zero by extracting the exponent. - * - * @see Scale - * Zero: Extract the Exponent - */ - private static long mapToIndexScaleZero(double value) { - long rawBits = Double.doubleToLongBits(value); - long rawExponent = (rawBits & EXPONENT_BIT_MASK) >> SIGNIFICAND_WIDTH; - long rawSignificand = rawBits & SIGNIFICAND_BIT_MASK; - if (rawExponent == 0) { - rawExponent -= Long.numberOfLeadingZeros(rawSignificand - 1) - EXPONENT_WIDTH - 1; - } - int ieeeExponent = (int) (rawExponent - EXPONENT_BIAS); - if (rawSignificand == 0) { - return ieeeExponent - 1; - } - return ieeeExponent; - } - -} diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java new file mode 100644 index 0000000000000..efef70d513a7f --- /dev/null +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java @@ -0,0 +1,265 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.exponentialhistogram; + +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX_BITS; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_SCALE; + +/** + * Utils for working with indices and scales of exponential bucket histograms. + */ +public class ExponentialScaleUtils { + + private static final double LN_2 = Math.log(2); + + /** + * Only visible for testing, the test ensures that this table is up-to-date. + *
+ * For each scale from {@link ExponentialHistogram#MIN_SCALE} to {@link ExponentialHistogram#MAX_SCALE} + * the table contains a pre-computed constant for performing up-scaling of bucket indices. + * The constant is computed using the following formula: + * (1 + 2^scale * ( 1 - log2(1 + 2^(2^-scale)))) + */ + static final double[] SCALE_UP_CONSTANT_TABLE = new double[]{ + 4.8828125E-4, + 9.765625E-4, + 0.001953125, + 0.00390625, + 0.0078125, + 0.015625, + 0.03124999998950301, + 0.06249862414928998, + 0.12429693135076524, + 0.22813428968741514, + 0.33903595255631885, + 0.4150374992788438, + 0.45689339367277604, + 0.47836619809201575, + 0.4891729613112115, + 0.49458521106164327, + 0.497292446757125, + 0.4986462035295225, + 0.4993230992835585, + 0.4996615493316266, + 0.49983077462704417, + 0.49991538730867596, + 0.4999576936537322, + 0.4999788468267904, + 0.4999894234133857, + 0.4999947117066917, + 0.4999973558533457, + 0.49999867792667285, + 0.4999993389633364, + 0.4999996694816682, + 0.4999998347408341, + 0.49999991737041705, + 0.4999999586852085, + 0.49999997934260426, + 0.49999998967130216, + 0.49999999483565105, + 0.4999999974178255, + 0.49999999870891276, + 0.4999999993544564, + 0.4999999996772282, + 0.4999999998386141, + 0.49999999991930705, + 0.49999999995965355, + 0.49999999997982675, + 0.4999999999899134, + 0.4999999999949567, + 0.49999999999747835, + 0.4999999999987392, + 0.49999999999936956, + 0.4999999999996848 + }; + + /** + * Computes the new index for a bucket when adjusting the scale of the histogram by the given amount. + * Note that this method does not only support down-scaling (=reducing the scale), but also upscaling. + * When scaling up, it will provide the bucket containing the point of least error of the original bucket. + * + * @param index the current bucket index to be upscaled + * @param currentScale the current scale + * @param scaleAdjustment the adjustment to make, the new scale will be currentScale + scaleAdjustment + * @return the index of the bucket in the new scale + */ + static long adjustScale(long index, int currentScale, int scaleAdjustment) { + if (scaleAdjustment <= 0) { + return index >> -scaleAdjustment; + } else { + // When scaling up, we want to return the bucket containing the point of least relative error. + // This bucket index can be computed as (index << adjustment) + offset + // Hereby offset is a constant which does not depend on the index, but only on the scale and adjustment + // The mathematically correct formula for offset is as follows: + // 2^adjustment * (1 + 2^currentScale * ( 1 - log2(1 + 2^(2^-scale)))) + // This is hard to compute in double precision, as it causes rounding errors, also it is quite expensive + // Therefore we precompute (1 + 2^currentScale * ( 1 - log2(1 + 2^(2^-scale)))) and store it + // in SCALE_UP_CONSTANT_TABLE for each scale + double offset = Math.scalb(SCALE_UP_CONSTANT_TABLE[currentScale - MIN_SCALE], scaleAdjustment); + return (index << scaleAdjustment) + (long) Math.floor(offset); + } + } + + + /** + * Equivalent to mathematically correct comparison of the lower bucket boundaries of the given buckets + */ + public static int compareLowerBoundaries(long idxA, int scaleA, long idxB, int scaleB) { + if (scaleA > scaleB) { + return -compareLowerBoundaries(idxB, scaleB, idxA, scaleA); + } + // scaleA <= scaleB + int shifts = scaleB - scaleA; + int maxScaleAdjustment = getMaximumScaleIncrease(idxA); + if (maxScaleAdjustment < shifts) { + // we would overflow if we adjust A to the scale of B + // so if A is negative, scaling would produce a number less than Long.MIN_VALUE, therefore it is definitely smaller than B + // if A is positive, scaling would produce a number bigger than Long.MAX_VALUE, therefore it is definitely bigger than B + // if A is zero => shifting and therefore scale adjustment would not have any effect + if (idxA == 0) { + return Long.compare(0, idxB); + } else { + return idxA < 0 ? -1 : +1; + } + } else { + long adjustedIdxA = idxA << shifts; + return Long.compare(adjustedIdxA, idxB); + } + } + + /** + * Returns the maximum permissible scale-increase which does not cause an overflow + * of the index. + */ + public static int getMaximumScaleIncrease(long index) { + if (index < MIN_INDEX || index > MAX_INDEX) { + throw new IllegalArgumentException("index must be in range [" + MIN_INDEX + ".." + MAX_INDEX + "]"); + } + if (index < 0) { + index = ~index; + } + return Long.numberOfLeadingZeros(index) - (64 - MAX_INDEX_BITS); + } + + public static double getUpperBucketBoundary(long index, int scale) { + return getLowerBucketBoundary(index + 1, scale); + } + + public static double getLowerBucketBoundary(long index, int scale) { + double inverseFactor = Math.scalb(LN_2, -scale); + return Math.exp(inverseFactor * index); + } + + /** + * For a bucket with the given index, computes the point x in the bucket so that + * (x - l) / l equals (u - x) / u where l is the lower bucket boundary and where u + * is the upper bucket boundary. + *
+ * In other words we select the point in the bucket which is guaranteed to have the least relative error towards any point in the bucket. + * + * @param bucketIndex the bucket index + * @param scale the scale of the histogram + * @return the point of least relative error + */ + public static double getPointOfLeastRelativeError(long bucketIndex, int scale) { + double upperBound = getUpperBucketBoundary(bucketIndex, scale); + double histogramBase = Math.pow(2, Math.scalb(1, -scale)); + return 2 / (histogramBase + 1) * upperBound; + } + + /** + * Provides the index of the bucket of the exponential histogram with the given scale + * containing the provided value. + */ + public static long computeIndex(double value, int scale) { + return Indexing.computeIndex(value, scale); + } + + /** + * The code in this class has been copied and slightly adapted from the + * OpenTelemetry Base2ExponentialHistogramIndexer implementation + * licensed under Apache License 2.0. + */ + private static class Indexing { + + /** Bit mask used to isolate exponent of IEEE 754 double precision number. */ + private static final long EXPONENT_BIT_MASK = 0x7FF0000000000000L; + + /** Bit mask used to isolate the significand of IEEE 754 double precision number. */ + private static final long SIGNIFICAND_BIT_MASK = 0xFFFFFFFFFFFFFL; + + /** Bias used in representing the exponent of IEEE 754 double precision number. */ + private static final int EXPONENT_BIAS = 1023; + + /** + * The number of bits used to represent the significand of IEEE 754 double precision number, + * excluding the implicit bit. + */ + private static final int SIGNIFICAND_WIDTH = 52; + + /** The number of bits used to represent the exponent of IEEE 754 double precision number. */ + private static final int EXPONENT_WIDTH = 11; + + private static final double LOG_BASE2_E = 1D / LN_2; + + static long computeIndex(double value, int scale) { + double absValue = Math.abs(value); + // For positive scales, compute the index by logarithm, which is simpler but may be + // inaccurate near bucket boundaries + if (scale > 0) { + return getIndexByLogarithm(absValue, scale); + } + // For scale zero, compute the exact index by extracting the exponent + if (scale == 0) { + return mapToIndexScaleZero(absValue); + } + // For negative scales, compute the exact index by extracting the exponent and shifting it to + // the right by -scale + return mapToIndexScaleZero(absValue) >> -scale; + } + + /** + * Compute the bucket index using a logarithm based approach. + * + * @see All + * Scales: Use the Logarithm Function + */ + private static long getIndexByLogarithm(double value, int scale) { + double scaleFactor = Math.scalb(LOG_BASE2_E, scale); + return (long) Math.ceil(Math.scalb(Math.log(value) * LOG_BASE2_E, scale)) - 1; + } + + /** + * Compute the exact bucket index for scale zero by extracting the exponent. + * + * @see Scale + * Zero: Extract the Exponent + */ + private static long mapToIndexScaleZero(double value) { + long rawBits = Double.doubleToLongBits(value); + long rawExponent = (rawBits & EXPONENT_BIT_MASK) >> SIGNIFICAND_WIDTH; + long rawSignificand = rawBits & SIGNIFICAND_BIT_MASK; + if (rawExponent == 0) { + rawExponent -= Long.numberOfLeadingZeros(rawSignificand - 1) - EXPONENT_WIDTH - 1; + } + int ieeeExponent = (int) (rawExponent - EXPONENT_BIAS); + if (rawSignificand == 0) { + return ieeeExponent - 1; + } + return ieeeExponent; + } + } + +} diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java new file mode 100644 index 0000000000000..3cde0f299a59a --- /dev/null +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java @@ -0,0 +1,207 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.exponentialhistogram; + +import java.util.OptionalLong; + +/** + * Implementation of a mutable {@link ExponentialHistogram} with a sparse, array-backed representation. + *
+ * Consumers must ensure that if the histogram is mutated, all previously acquired @{@link org.elasticsearch.exponentialhistogram.ExponentialHistogram.BucketIterator} + * must not be used anymore. + */ +public final class FixedCapacityExponentialHistogram implements ExponentialHistogram { + + // TODO: maybe switch to BigArrays? + + // These arrays represent both the positive and the negative buckets + // They store first all negative buckets in ascending index order, followed by the positive buckets in ascending index order + private final long[] bucketIndices; + private final long[] bucketCounts; + + private int negativeBucketCount; + private int positiveBucketCount; + private int bucketScale; + + private ZeroBucket zeroBucket; + + /** + * Creates an empty histogram with the given capacity and a {@link ZeroBucket#minimalEmpty()} zero bucket. + * The scale is set to the maximum possible precisions ({@link #MAX_SCALE}). + * + * @param bucketCapacity the maximum number of positive and negative buckets this histogram can hold in total. + */ + public FixedCapacityExponentialHistogram(int bucketCapacity) { + bucketIndices = new long[bucketCapacity]; + bucketCounts = new long[bucketCapacity]; + reset(); + } + + /** + * Resets this histogram so it has the same state as a newly constructed one with the same capacity. + */ + public void reset() { + setZeroBucket(ZeroBucket.minimalEmpty()); + resetBuckets(MAX_SCALE); + } + + /** + * Removes all positive and negative buckets of this histogram. Sets the scale to the given value. + */ + public void resetBuckets(int scale) { + if (scale > MAX_SCALE || scale < MIN_SCALE) { + throw new IllegalArgumentException("scale must be in range ["+MIN_SCALE+".."+MAX_SCALE+"]"); + } + negativeBucketCount = 0; + positiveBucketCount = 0; + bucketScale = scale; + } + + @Override + public ZeroBucket zeroBucket() { + return zeroBucket; + } + + /** + * Replaces the zero bucket of this histogram with the given one. + * Callers must ensure that the given @{@link ZeroBucket} does not overlap any of the positive or negative buckets of this histogram. + */ + public void setZeroBucket(ZeroBucket zeroBucket) { + this.zeroBucket = zeroBucket; + } + + /** + * Attempts to add a bucket to the range of positive or negative buckets of this histogram. + *
+ * Callers have to adhere to the following rules: + * + * + * If any of the rules above are violated, this call fails with an exception. + * In contrast if the bucket cannot be added because the maximum capacity has been reached, the call will not modify the state + * of this histogram and return false. + * + * @param index the index of the bucket to add + * @param count the count to associate with the given bucket + * @param isPositive true, if the bucket to add belongs to the positive range, false if it belongs to the negative range + * @return true if the bucket was added, false if it could not be added due to insufficient capacity + */ + public boolean tryAddBucket(long index, long count, boolean isPositive) { + if (index < MIN_INDEX || index > MAX_INDEX) { + throw new IllegalArgumentException("index must be in range [" + MIN_INDEX + ".." + MAX_INDEX + "]"); + } + if (isPositive == false && positiveBucketCount > 0) { + throw new IllegalArgumentException("Cannot add negative buckets after a positive bucket was added"); + } + if (count <= 0) { + throw new IllegalArgumentException("Cannot add an empty bucket"); + } + int slot = negativeBucketCount + positiveBucketCount; + if (slot >= bucketCounts.length) { + return false; // no more space + } + bucketIndices[slot] = index; + bucketCounts[slot] = count; + if (isPositive) { + if (positiveBucketCount > 0 && bucketIndices[slot - 1] > index) { + throw new IllegalStateException("Buckets must be added in ascending index order!"); + } + positiveBucketCount++; + } else { + if (negativeBucketCount > 0 && bucketIndices[slot - 1] > index) { + throw new IllegalStateException("Buckets must be added in ascending index order!"); + } + negativeBucketCount++; + } + return true; + } + + @Override + public int scale() { + return bucketScale; + } + + @Override + public CopyableBucketIterator negativeBuckets() { + return new BucketArrayIterator(0, negativeBucketCount); + } + + @Override + public OptionalLong maximumBucketIndex() { + long maxIndex = Long.MIN_VALUE; + if (negativeBucketCount > 0) { + maxIndex = bucketIndices[negativeBucketCount - 1]; + } + if (positiveBucketCount > 0) { + maxIndex = Math.max(maxIndex, bucketIndices[negativeBucketCount + positiveBucketCount - 1]); + } + return maxIndex == Long.MIN_VALUE ? OptionalLong.empty() : OptionalLong.of(maxIndex); + } + + @Override + public CopyableBucketIterator positiveBuckets() { + return new BucketArrayIterator(negativeBucketCount, negativeBucketCount + positiveBucketCount); + } + + private class BucketArrayIterator implements CopyableBucketIterator { + + int current; + final int limit; + + private BucketArrayIterator(int start, int limit) { + this.current = start; + this.limit = limit; + } + + @Override + public boolean hasNext() { + return current < limit; + } + + @Override + public long peekCount() { + ensureEndNotReached(); + return bucketCounts[current]; + } + + @Override + public long peekIndex() { + ensureEndNotReached(); + return bucketIndices[current]; + } + + @Override + public void advance() { + ensureEndNotReached(); + current++; + } + + @Override + public int scale() { + return FixedCapacityExponentialHistogram.this.scale(); + } + + @Override + public CopyableBucketIterator copy() { + return new BucketArrayIterator(current, limit); + } + + private void ensureEndNotReached() { + if (hasNext() == false) { + throw new IllegalStateException("No more buckets"); + } + } + } + +} diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java deleted file mode 100644 index 6d182666ccd2f..0000000000000 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogram.java +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.exponentialhistogram; - -public final class FixedSizeExponentialHistogram implements ExponentialHistogramBuilder, ExponentialHistogram { - - private final long[] bucketIndices; - private final long[] bucketCounts; - private int negativeBucketCount; - private int positiveBucketCount; - private int bucketScale; - - private ZeroBucket zeroBucket; - - public FixedSizeExponentialHistogram(int bucketCount) { - bucketCount = Math.max(bucketCount, 2); // we need at least two buckets, one for positive values, one for negative - bucketIndices = new long[bucketCount]; - bucketCounts = new long[bucketCount]; - reset(); - } - - void reset() { - setZeroBucket(ZeroBucket.minimalEmpty()); - resetBuckets(MAX_SCALE); - } - - @Override - public void resetBuckets(int newScale) { - if (newScale > MAX_SCALE) { - throw new IllegalArgumentException("scale must be <= MAX_SCALE ("+MAX_SCALE+")"); - } - negativeBucketCount = 0; - positiveBucketCount = 0; - bucketScale = newScale; - } - - @Override - public ZeroBucket zeroBucket() { - return zeroBucket; - } - - @Override - public void setZeroBucket(ZeroBucket zeroBucket) { - this.zeroBucket = zeroBucket; - } - - @Override - public boolean tryAddBucket(long index, long count, boolean isPositive) { - if (index < MIN_INDEX || index > MAX_INDEX) { - throw new IllegalArgumentException("index must be in range ["+MIN_INDEX+".."+MAX_INDEX+"]"); - } - if (isPositive == false && positiveBucketCount > 0) { - throw new IllegalArgumentException("Cannot add negative buckets after a positive bucket was added"); - } - int slot = negativeBucketCount + positiveBucketCount; - if (slot >= bucketCounts.length) { - return false; // no more space - } - bucketIndices[slot] = index; - bucketCounts[slot] = count; - if (isPositive) { - if (positiveBucketCount > 0 && bucketIndices[slot - 1] > index) { - throw new IllegalStateException("Buckets must be added in ascending index order!"); - } - positiveBucketCount++; - } else { - if (negativeBucketCount > 0 && bucketIndices[slot - 1] > index) { - throw new IllegalStateException("Buckets must be added in ascending index order!"); - } - negativeBucketCount++; - } - return true; - } - - @Override - public int scale() { - return bucketScale; - } - - @Override - public BucketIterator negativeBuckets() { - return new BucketArrayIterator(0, negativeBucketCount); - } - - @Override - public long maximumBucketIndex() { - long maxIndex = Long.MIN_VALUE; - if (negativeBucketCount > 0) { - maxIndex = bucketIndices[negativeBucketCount - 1]; - } - if (positiveBucketCount > 0) { - maxIndex = Math.max(maxIndex, bucketIndices[negativeBucketCount + positiveBucketCount - 1]); - } - return maxIndex; - } - - @Override - public BucketIterator positiveBuckets() { - return new BucketArrayIterator(negativeBucketCount, negativeBucketCount + positiveBucketCount); - } - - private class BucketArrayIterator implements BucketIterator { - - int current; - final int limit; - - private BucketArrayIterator(int start, int limit) { - this.current = start; - this.limit = limit; - } - - @Override - public boolean hasNext() { - return current < limit; - } - - @Override - public long peekCount() { - if (hasNext() == false) { - throw new IllegalStateException("No more buckets"); - } - return bucketCounts[current]; - } - - @Override - public long peekIndex() { - if (hasNext() == false) { - throw new IllegalStateException("No more buckets"); - } - return bucketIndices[current]; - } - - @Override - public void advance() { - if (hasNext() == false) { - throw new IllegalStateException("No more buckets"); - } - current++; - } - - @Override - public int scale() { - return FixedSizeExponentialHistogram.this.scale(); - } - - @Override - public BucketIterator copy() { - return new BucketArrayIterator(current, limit); - } - } - -} diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java index 264a38e36883e..b3787e7fc948d 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java @@ -9,7 +9,7 @@ package org.elasticsearch.exponentialhistogram; -public final class MergingBucketIterator implements ExponentialHistogram.BucketIterator { +final class MergingBucketIterator implements ExponentialHistogram.BucketIterator { private final ExponentialHistogram.BucketIterator itA; private final ExponentialHistogram.BucketIterator itB; @@ -18,7 +18,7 @@ public final class MergingBucketIterator implements ExponentialHistogram.BucketI private long currentIndex; private long currentCount; - public MergingBucketIterator(ExponentialHistogram.BucketIterator itA, ExponentialHistogram.BucketIterator itB, int targetScale) { + MergingBucketIterator(ExponentialHistogram.BucketIterator itA, ExponentialHistogram.BucketIterator itB, int targetScale) { this.itA = new ScaleAdjustingBucketIterator(itA, targetScale); this.itB = new ScaleAdjustingBucketIterator(itB, targetScale); endReached = false; diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java index 90f7187d68402..f85de26758f77 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java @@ -9,9 +9,9 @@ package org.elasticsearch.exponentialhistogram; -import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.adjustScale; +import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.adjustScale; -public final class ScaleAdjustingBucketIterator implements ExponentialHistogram.BucketIterator { +final class ScaleAdjustingBucketIterator implements ExponentialHistogram.BucketIterator { private final ExponentialHistogram.BucketIterator delegate; private final int scaleAdjustment; @@ -20,7 +20,7 @@ public final class ScaleAdjustingBucketIterator implements ExponentialHistogram. private long currentCount; boolean hasNextValue; - public ScaleAdjustingBucketIterator(ExponentialHistogram.BucketIterator delegate, int targetScale) { + ScaleAdjustingBucketIterator(ExponentialHistogram.BucketIterator delegate, int targetScale) { this.delegate = delegate; scaleAdjustment = targetScale - delegate.scale(); hasNextValue = true; @@ -51,10 +51,10 @@ public void advance() { if (hasNextValue == false) { return; } - currentIndex = adjustScale(delegate.peekIndex(), scaleAdjustment); + currentIndex = adjustScale(delegate.peekIndex(), delegate.scale(), scaleAdjustment); currentCount = delegate.peekCount(); delegate.advance(); - while (delegate.hasNext() && adjustScale(delegate.peekIndex(), scaleAdjustment) == currentIndex) { + while (delegate.hasNext() && adjustScale(delegate.peekIndex(), delegate.scale(), scaleAdjustment) == currentIndex) { currentCount += delegate.peekCount(); delegate.advance(); } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java index f1b16b5a21dbe..803dc92057a87 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java @@ -10,9 +10,9 @@ package org.elasticsearch.exponentialhistogram; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX; -import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.compareLowerBoundaries; -import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.computeIndex; -import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.getLowerBucketBoundary; +import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.compareLowerBoundaries; +import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.computeIndex; +import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.getLowerBucketBoundary; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE; public record ZeroBucket(long index, int scale, long count) { diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTests.java index 043afc26aa1fb..df6a1773202c8 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTests.java @@ -37,7 +37,7 @@ public void testRandom() { Random rnd = new Random(42); for (int i = 0; i < 100; i++) { - List values = IntStream.range(0, 10_000).mapToObj(j -> rnd.nextLong()).distinct().toList(); + List values = IntStream.range(0, 1000).mapToObj(j -> rnd.nextLong()).distinct().toList(); verifyFor(values); } } diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGeneratorTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGeneratorTests.java index 52f17a9f42645..606acda310c55 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGeneratorTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGeneratorTests.java @@ -23,8 +23,8 @@ public void testVeryLargeValue() { long index = histo.positiveBuckets().peekIndex(); int scale = histo.scale(); - double lowerBound = ExponentialHistogramUtils.getLowerBucketBoundary(index, scale); - double upperBound = ExponentialHistogramUtils.getUpperBucketBoundary(index, scale); + double lowerBound = ExponentialScaleUtils.getLowerBucketBoundary(index, scale); + double upperBound = ExponentialScaleUtils.getUpperBucketBoundary(index, scale); assertThat("Lower bucket boundary should be smaller than value", lowerBound, lessThanOrEqualTo(value)); assertThat("Upper bucket boundary should be greater than value", upperBound, greaterThanOrEqualTo(value)); diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java index b2385003674ad..e4feb10e52329 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java @@ -19,17 +19,16 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; -import static org.elasticsearch.exponentialhistogram.FixedSizeExponentialHistogramTests.printMidpoints; import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.equalTo; public class ExponentialHistogramMergerTests extends ESTestCase { public void testZeroThresholdCollapsesOverlappingBuckets() { - FixedSizeExponentialHistogram first = new FixedSizeExponentialHistogram(100); + FixedCapacityExponentialHistogram first = new FixedCapacityExponentialHistogram(100); first.setZeroBucket(new ZeroBucket(2.0001, 10)); - FixedSizeExponentialHistogram second = new FixedSizeExponentialHistogram(100); + FixedCapacityExponentialHistogram second = new FixedCapacityExponentialHistogram(100); first.resetBuckets(0); // scale 0 means base 2 first.tryAddBucket(0, 1, false); // bucket (-2, 1] first.tryAddBucket(1, 1, false); // bucket (-4, 2] @@ -59,7 +58,7 @@ public void testZeroThresholdCollapsesOverlappingBuckets() { assertThat(posBuckets.hasNext(), equalTo(false)); // ensure buckets of the accumulated histogram are collapsed too if needed - FixedSizeExponentialHistogram third = new FixedSizeExponentialHistogram(100); + FixedCapacityExponentialHistogram third = new FixedCapacityExponentialHistogram(100); third.setZeroBucket(new ZeroBucket(45.0, 1)); mergeResult = mergeWithMinimumScale(100, 0, mergeResult, third); @@ -70,12 +69,12 @@ public void testZeroThresholdCollapsesOverlappingBuckets() { } public void testEmptyZeroBucketIgnored() { - FixedSizeExponentialHistogram first = new FixedSizeExponentialHistogram(100); + FixedCapacityExponentialHistogram first = new FixedCapacityExponentialHistogram(100); first.setZeroBucket(new ZeroBucket(2.0, 10)); first.resetBuckets(0); // scale 0 means base 2 first.tryAddBucket(2, 42L, true); // bucket (4, 8] - FixedSizeExponentialHistogram second = new FixedSizeExponentialHistogram(100); + FixedCapacityExponentialHistogram second = new FixedCapacityExponentialHistogram(100); second.setZeroBucket(new ZeroBucket(100.0, 0)); ExponentialHistogram mergeResult = mergeWithMinimumScale(100, 0, first, second); @@ -112,8 +111,6 @@ public void testMergeOrderIndependence() { assertBucketsEqual(shuffled.negativeBuckets(), reference.negativeBuckets()); assertBucketsEqual(shuffled.positiveBuckets(), reference.positiveBuckets()); } - printMidpoints(reference); - } private void assertBucketsEqual(ExponentialHistogram.BucketIterator itA, ExponentialHistogram.BucketIterator itB) { diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTests.java deleted file mode 100644 index 5b1d8154c80cf..0000000000000 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTests.java +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.exponentialhistogram; - -import org.elasticsearch.test.ESTestCase; - -import java.util.Random; - -import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX; -import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE; -import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX; -import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.adjustScale; -import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.compareLowerBoundaries; -import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.getLowerBucketBoundary; -import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.getMaximumScaleIncrease; -import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.getUpperBucketBoundary; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.greaterThan; -import static org.hamcrest.Matchers.lessThan; -import static org.hamcrest.Matchers.lessThanOrEqualTo; - -public class ExponentialHistogramUtilsTests extends ESTestCase { - - public void testMaxValue() { - assertThat(getMaximumScaleIncrease(MAX_INDEX), equalTo(0)); - assertThat(getMaximumScaleIncrease(MAX_INDEX >> 1), equalTo(1)); - - assertThrows(ArithmeticException.class, () -> Math.multiplyExact(MAX_INDEX, 4)); - } - - public void testMinValue() { - assertThat(getMaximumScaleIncrease(MIN_INDEX), equalTo(0)); - assertThat(getMaximumScaleIncrease(MIN_INDEX >> 1), equalTo(1)); - } - - public void testRandomIndicesScaleAdjustement() { - Random rnd = new Random(42); - - for (int i = 0; i < 100_000; i++) { - long index = rnd.nextLong() % MAX_INDEX; - int maxScale = getMaximumScaleIncrease(index); - - assertThat(adjustScale(adjustScale(index, maxScale), -maxScale), equalTo(index)); - if (index >0) { - assertThat(adjustScale(index, maxScale) *2, greaterThan(MAX_INDEX)); - } else { - assertThat(adjustScale(index, maxScale) *2, lessThan(MIN_INDEX)); - - } - } - - } - - public void testRandomBucketBoundaryComparison() { - Random rnd = new Random(42); - - for (int i = 0; i < 100_000; i++) { - long indexA = rnd.nextLong() % MAX_INDEX; - long indexB = rnd.nextLong() % MAX_INDEX; - int scaleA = rnd.nextInt() % MAX_SCALE; - int scaleB = rnd.nextInt() % MAX_SCALE; - - double lowerBoundA = getLowerBucketBoundary(indexA, scaleA); - while (Double.isInfinite(lowerBoundA)) { - indexA = indexA >> 1; - lowerBoundA = getLowerBucketBoundary(indexA, scaleA); - } - double lowerBoundB = getLowerBucketBoundary(indexB, scaleB); - while (Double.isInfinite(lowerBoundB)) { - indexB = indexB >> 1; - lowerBoundB = getLowerBucketBoundary(indexB, scaleB); - } - - if (lowerBoundA != lowerBoundB) { - assertThat(Double.compare(lowerBoundA, lowerBoundB), equalTo(compareLowerBoundaries(indexA, scaleA, indexB, scaleB))); - } - } - - } - - public void testScalingUpToMidpoint() { - long midpointIndex = adjustScale(0, 64); - double lowerBoundary = getLowerBucketBoundary(midpointIndex, 64); - double upperBoundary = getUpperBucketBoundary(midpointIndex, 64); - - // due to limited double-float precision the results are actually exact - assertThat(lowerBoundary, equalTo(4.0 / 3.0)); - assertThat(upperBoundary, equalTo(4.0 / 3.0)); - } - - public void testSaneBucketBoundaries() { - assertThat(getLowerBucketBoundary(0, 42), equalTo(1.0)); - assertThat(getLowerBucketBoundary(1, 0), equalTo(2.0)); - assertThat(getLowerBucketBoundary(1, -1), equalTo(4.0)); - - double limit1 = getLowerBucketBoundary(MIN_INDEX, MAX_SCALE); - double limit2 = getLowerBucketBoundary(MIN_INDEX, MAX_SCALE); - assertThat(limit1, lessThanOrEqualTo(limit2)); - } -} diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtilsTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtilsTests.java new file mode 100644 index 0000000000000..ce47c73acfef8 --- /dev/null +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtilsTests.java @@ -0,0 +1,198 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.exponentialhistogram; + +import ch.obermuhlner.math.big.BigDecimalMath; + +import org.elasticsearch.test.ESTestCase; + +import java.math.BigDecimal; +import java.math.MathContext; +import java.util.Random; + +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX_BITS; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_SCALE; +import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.SCALE_UP_CONSTANT_TABLE; +import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.adjustScale; +import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.compareLowerBoundaries; +import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.computeIndex; +import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.getLowerBucketBoundary; +import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.getMaximumScaleIncrease; +import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.getPointOfLeastRelativeError; +import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.getUpperBucketBoundary; +import static org.hamcrest.Matchers.closeTo; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.lessThan; +import static org.hamcrest.Matchers.lessThanOrEqualTo; + +public class ExponentialScaleUtilsTests extends ESTestCase { + + public void testMaxIndex() { + assertThat(getMaximumScaleIncrease(MAX_INDEX), equalTo(0)); + assertThat(getMaximumScaleIncrease(MAX_INDEX >> 1), equalTo(1)); + assertThrows(ArithmeticException.class, () -> Math.multiplyExact(MAX_INDEX, 4)); + } + + public void testMinIndex() { + assertThat(getMaximumScaleIncrease(MIN_INDEX), equalTo(0)); + assertThat(getMaximumScaleIncrease(MIN_INDEX >> 1), equalTo(1)); + assertThrows(ArithmeticException.class, () -> Math.multiplyExact(MIN_INDEX, 4)); + } + + public void testExtremeValueIndexing() { + double leeway = Math.pow(10.0, 20); + + for (double testValue : new double[] { Double.MAX_VALUE / leeway, Double.MIN_VALUE * leeway }) { + long idx = computeIndex(testValue, MAX_SCALE); + double lowerBound = getLowerBucketBoundary(idx, MAX_SCALE); + double upperBound = getUpperBucketBoundary(idx, MAX_SCALE); + assertThat(lowerBound, lessThanOrEqualTo(testValue)); + assertThat(upperBound, greaterThanOrEqualTo(testValue)); + assertThat(lowerBound, lessThan(upperBound)); + } + } + + public void testRandomValueIndexing() { + Random rnd = new Random(42); + + for (int i = 0; i < 100_000; i++) { + // generate values in the range 10^-100 to 10^100 + double exponent = rnd.nextDouble() * 200 - 100; + double testValue = Math.pow(10, exponent); + int scale = rnd.nextInt(MIN_SCALE/2, MAX_SCALE/2); + long index = computeIndex(testValue, scale); + + double lowerBound = getLowerBucketBoundary(index, scale); + double upperBound = getUpperBucketBoundary(index, scale); + double pointOfLeastError = getPointOfLeastRelativeError(index, scale); + + String baseMsg = " for input value " + testValue + " and scale " + scale; + + assertThat("Expected lower bound to be less than input value", lowerBound, lessThanOrEqualTo(testValue)); + assertThat("Expected upper bound to be greater than input value", upperBound, greaterThanOrEqualTo(upperBound)); + assertThat("Expected lower bound to be less than upper bound" + baseMsg, lowerBound, lessThan(upperBound)); + + // only do this check for ranges where we have enough numeric stability + if (lowerBound > Math.pow(10, -250) && upperBound < Math.pow(10, 250)) { + + assertThat( + "Expected point of least error to be greater than lower bound" + baseMsg, + pointOfLeastError, + greaterThan(lowerBound) + ); + assertThat("Expected point of least error to be less than upper bound" + baseMsg, pointOfLeastError, lessThan(upperBound)); + + double errorLower = (pointOfLeastError - lowerBound) / lowerBound; + double errorUpper = (upperBound - pointOfLeastError) / upperBound; + assertThat(errorLower / errorUpper, closeTo(1, 0.1)); + } + + } + } + + public void testRandomIndicesScaleAdjustement() { + Random rnd = new Random(42); + + for (int i = 0; i < 100_000; i++) { + long index = rnd.nextLong(MAX_INDEX); + int currentScale = rnd.nextInt(MIN_SCALE, MAX_SCALE); + int maxAdjustment = getMaximumScaleIncrease(index); + + assertThat( + adjustScale(adjustScale(index, currentScale, maxAdjustment), currentScale + maxAdjustment, -maxAdjustment), + equalTo(index) + ); + if (index > 0) { + assertThat(adjustScale(index, currentScale, maxAdjustment) * 2, greaterThan(MAX_INDEX)); + } else { + assertThat(adjustScale(index, currentScale, maxAdjustment) * 2, lessThan(MIN_INDEX)); + + } + } + + } + + public void testRandomBucketBoundaryComparison() { + Random rnd = new Random(42); + + for (int i = 0; i < 100_000; i++) { + long indexA = rnd.nextLong() % MAX_INDEX; + long indexB = rnd.nextLong() % MAX_INDEX; + int scaleA = rnd.nextInt() % MAX_SCALE; + int scaleB = rnd.nextInt() % MAX_SCALE; + + double lowerBoundA = getLowerBucketBoundary(indexA, scaleA); + while (Double.isInfinite(lowerBoundA)) { + indexA = indexA >> 1; + lowerBoundA = getLowerBucketBoundary(indexA, scaleA); + } + double lowerBoundB = getLowerBucketBoundary(indexB, scaleB); + while (Double.isInfinite(lowerBoundB)) { + indexB = indexB >> 1; + lowerBoundB = getLowerBucketBoundary(indexB, scaleB); + } + + if (lowerBoundA != lowerBoundB) { + assertThat(Double.compare(lowerBoundA, lowerBoundB), equalTo(compareLowerBoundaries(indexA, scaleA, indexB, scaleB))); + } + } + } + + public void testUpscalingAccuracy() { + Random rnd = new Random(42); + + // Use slightly adjusted scales to not run into numeric trouble, because we don't use exact maths here + int minScale = MIN_SCALE + 7; + int maxScale = MAX_SCALE - 15; + + for (int i = 0; i < 10_000; i++) { + + int startScale = rnd.nextInt(minScale, maxScale); + int scaleIncrease = rnd.nextInt(1, maxScale-startScale + 1); + + long index = MAX_INDEX >> scaleIncrease >> (int) (rnd.nextDouble() * (MAX_INDEX_BITS - scaleIncrease)); + index = Math.max(1, index); + index = (long) (rnd.nextDouble() * index) * (rnd.nextBoolean() ? 1 : -1); + + + double midPoint = getPointOfLeastRelativeError(index, startScale); + // limit the numeric range, otherwise we get rounding errors causing the test to fail + while (midPoint > Math.pow(10, 10) || midPoint < Math.pow(10, -10)) { + index /= 2; + midPoint = getPointOfLeastRelativeError(index, startScale); + } + + long scaledUpIndex = adjustScale(index, startScale, scaleIncrease); + long correctIdx = computeIndex(midPoint, startScale + scaleIncrease); + // Due to rounding problems in the tests, we can still be off by one for extreme scales + assertThat(scaledUpIndex, equalTo(correctIdx)); + } + } + + public void testScaleUpTableUpToDate() { + + MathContext mc = new MathContext(200); + BigDecimal one = new BigDecimal(1, mc); + BigDecimal two = new BigDecimal(2, mc); + + for (int scale = MIN_SCALE; scale <= MAX_SCALE; scale++) { + BigDecimal base = BigDecimalMath.pow(two, two.pow(-scale, mc), mc); + BigDecimal factor = one.add(two.pow(scale, mc).multiply(one.subtract(BigDecimalMath.log2(one.add(base), mc)))); + assertThat(SCALE_UP_CONSTANT_TABLE[scale - MIN_SCALE], equalTo(factor.doubleValue())); + + } + } + +} diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTests.java deleted file mode 100644 index 3bb609ef37719..0000000000000 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedSizeExponentialHistogramTests.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.exponentialhistogram; - -import org.elasticsearch.test.ESTestCase; - -import java.util.stream.IntStream; - -import static org.hamcrest.Matchers.equalTo; - -public class FixedSizeExponentialHistogramTests extends ESTestCase { - - - public void testDefaultZeroBucketHasZeroThreshold() { - ExponentialHistogram histo = ExponentialHistogramGenerator.createFor(); - assertThat(histo.zeroBucket().zeroThreshold(), equalTo(0.0)); - } - - public void testPrintBuckets() { - ExponentialHistogram first = ExponentialHistogramGenerator.createFor(0.01234, 42, 56789); - ExponentialHistogram second = ExponentialHistogramGenerator.createFor(38, 50, 250, 257, 10001.1234); - - ExponentialHistogram result = ExponentialHistogramMerger.merge(7, first, second); - printMidpoints(result); - } - - public void testPrintBucketsLinearScale() { - - ExponentialHistogram result = ExponentialHistogramGenerator.createFor( - 1000, - IntStream.range(-1_000_000, 2_000_000).mapToDouble(Double::valueOf) - ); - - double smallPerc = ExponentialHistogramQuantile.getQuantile(result, 0.00001); - double highPerc = ExponentialHistogramQuantile.getQuantile(result, 0.9999); - double median = ExponentialHistogramQuantile.getQuantile(result, 0.5); - - printMidpoints(result); - } - - public static void printMidpoints(ExponentialHistogram histo) { - StringBuilder sb = new StringBuilder("{ base : "); - sb.append(ExponentialHistogramUtils.getLowerBucketBoundary(1, histo.scale())).append(", "); - ExponentialHistogram.BucketIterator neg = histo.negativeBuckets(); - while (neg.hasNext()) { - long idx = neg.peekIndex(); - long count = neg.peekCount(); - double center = -ExponentialHistogramUtils.getPointOfLeastRelativeError(idx, neg.scale()); - sb.append(center).append(":").append(count).append(", "); - neg.advance(); - } - sb.append("0.0 : ").append(histo.zeroBucket().count()); - ExponentialHistogram.BucketIterator pos = histo.positiveBuckets(); - while (pos.hasNext()) { - long idx = pos.peekIndex(); - long count = pos.peekCount(); - double center = ExponentialHistogramUtils.getPointOfLeastRelativeError(idx, pos.scale()); - sb.append(", ").append(center).append(":").append(count); - pos.advance(); - } - sb.append('}'); - System.out.println(sb); - - } -} diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramBuilder.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ZeroBucketTests.java similarity index 65% rename from libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramBuilder.java rename to libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ZeroBucketTests.java index af2dbcc86bfdc..6fc4a1b6671a8 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramBuilder.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ZeroBucketTests.java @@ -9,11 +9,13 @@ package org.elasticsearch.exponentialhistogram; -public interface ExponentialHistogramBuilder { +import org.elasticsearch.test.ESTestCase; - void setZeroBucket(ZeroBucket zeroBucket); +import static org.hamcrest.Matchers.equalTo; - boolean tryAddBucket(long index, long count, boolean isPositive); +public class ZeroBucketTests extends ESTestCase { - void resetBuckets(int newScale); + public void testMinimalBucketHasZeroThreshold() { + assertThat(ZeroBucket.minimalWithCount(42).zeroThreshold(), equalTo(0.0)); + } } From 454a9cccda03337d4361650a136592e21b910445 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Mon, 14 Jul 2025 16:29:09 +0200 Subject: [PATCH 16/32] Remove dead code --- .../ExponentialHistogram.java | 4 +--- .../ExponentialHistogramMerger.java | 18 ++++-------------- .../MergingBucketIterator.java | 5 ----- .../ScaleAdjustingBucketIterator.java | 5 ----- 4 files changed, 5 insertions(+), 27 deletions(-) diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java index cd77873e2231d..2f13c28b03776 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java @@ -40,7 +40,7 @@ public interface ExponentialHistogram { // One option would be to use "Quadruple": https://github.com/m-vokhm/Quadruple int MAX_SCALE = 38; - // Add this scale all double values already fall into a single bucket + // At this scale all double values already fall into a single bucket int MIN_SCALE = -11; // Only use 62 bit at max to allow to compute the difference between the smallest and largest index without causing overflow @@ -122,8 +122,6 @@ interface BucketIterator { * @return the scale, which is guaranteed to be constant over the lifetime of this iterator. */ int scale(); - - BucketIterator copy(); } /** diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java index 929cfe0e18e10..fa2040e493381 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java @@ -62,26 +62,16 @@ public ExponentialHistogram get() { return result; } - public static ExponentialHistogram merge(int bucketCount, ExponentialHistogram... histograms) { - return merge(bucketCount, Arrays.stream(histograms)); - } - - public static ExponentialHistogram merge(int bucketCount, Stream histograms) { - ExponentialHistogramMerger merger = new ExponentialHistogramMerger(bucketCount); - histograms.forEach(merger::add); - return merger.get(); - } - // TODO: this algorithm is very efficient if b has roughly as many buckets as a // However, if b is much smaller we still have to iterate over all buckets of a which is very wasteful // This can be optimized by buffering multiple histograms to accumulate first, then in O(log(b)) turn them into a single, merged histogram // (b is the number of buffered buckets) private void merge(FixedCapacityExponentialHistogram output, ExponentialHistogram a, ExponentialHistogram b) { - ExponentialHistogram.BucketIterator posBucketsA = a.positiveBuckets(); - ExponentialHistogram.BucketIterator negBucketsA = a.negativeBuckets(); - ExponentialHistogram.BucketIterator posBucketsB = b.positiveBuckets(); - ExponentialHistogram.BucketIterator negBucketsB = b.negativeBuckets(); + ExponentialHistogram.CopyableBucketIterator posBucketsA = a.positiveBuckets(); + ExponentialHistogram.CopyableBucketIterator negBucketsA = a.negativeBuckets(); + ExponentialHistogram.CopyableBucketIterator posBucketsB = b.positiveBuckets(); + ExponentialHistogram.CopyableBucketIterator negBucketsB = b.negativeBuckets(); ZeroBucket zeroBucket = a.zeroBucket().merge(b.zeroBucket()); zeroBucket = zeroBucket.collapseOverlappingBuckets(posBucketsA, negBucketsA, posBucketsB, negBucketsB); diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java index b3787e7fc948d..26059fcc2890b 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java @@ -79,11 +79,6 @@ public int scale() { return itA.scale(); } - @Override - public ExponentialHistogram.BucketIterator copy() { - throw new UnsupportedOperationException(); - } - private void assertEndNotReached() { if (endReached) { throw new IllegalStateException("No more buckets"); diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java index f85de26758f77..a8f59a7dd1a93 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java @@ -70,9 +70,4 @@ private void assertEndNotReached() { public int scale() { return delegate.scale() + scaleAdjustment; } - - @Override - public ExponentialHistogram.BucketIterator copy() { - throw new UnsupportedOperationException(); - } } From 486a8bdd568517581cd2f487c09014508c0cf746 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Mon, 14 Jul 2025 16:33:31 +0200 Subject: [PATCH 17/32] A bit more javadoc --- .../exponentialhistogram/MergingBucketIterator.java | 4 ++++ .../exponentialhistogram/ScaleAdjustingBucketIterator.java | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java index 26059fcc2890b..7e3d2458888d7 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java @@ -9,6 +9,10 @@ package org.elasticsearch.exponentialhistogram; +/** + * Iterates over two sets of buckets in parallel, bringing them to the same scale + * and merging buckets which exist in both. + */ final class MergingBucketIterator implements ExponentialHistogram.BucketIterator { private final ExponentialHistogram.BucketIterator itA; diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java index a8f59a7dd1a93..a619fcd70d482 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java @@ -11,6 +11,11 @@ import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.adjustScale; +/** + * Iterates over buckets while also adjusting the scale. + * When scaling down, this can cause multiple buckets to collapse into a single one. + * This iterator ensures that they are properly merged in this case. + */ final class ScaleAdjustingBucketIterator implements ExponentialHistogram.BucketIterator { private final ExponentialHistogram.BucketIterator delegate; From 7c6655b37043c1ac171ec878c0750504da68cd1b Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Tue, 15 Jul 2025 11:41:37 +0200 Subject: [PATCH 18/32] AI-assisted javadoc and spotless --- .../ExponentialHistogramGenerationBench.java | 2 +- .../exponentialhistogram/DownscaleStats.java | 46 ++-- .../ExponentialHistogram.java | 90 ++++---- .../ExponentialHistogramGenerator.java | 47 ++-- .../ExponentialHistogramMerger.java | 33 ++- .../ExponentialHistogramQuantile.java | 29 ++- .../ExponentialScaleUtils.java | 205 ++++++++++-------- .../FixedCapacityExponentialHistogram.java | 63 +++--- .../MergingBucketIterator.java | 12 +- .../ScaleAdjustingBucketIterator.java | 13 +- .../exponentialhistogram/ZeroBucket.java | 65 +++++- .../ExponentialHistogramGeneratorTests.java | 2 +- .../ExponentialScaleUtilsTests.java | 5 +- 13 files changed, 376 insertions(+), 236 deletions(-) diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramGenerationBench.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramGenerationBench.java index 4fe509e200aae..f009aaf813eb0 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramGenerationBench.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramGenerationBench.java @@ -43,7 +43,7 @@ @State(Scope.Thread) public class ExponentialHistogramGenerationBench { - @Param({ "100", "500", "1000", "5000" , "10000", "20000"}) + @Param({ "100", "500", "1000", "5000", "10000", "20000" }) int bucketCount; @Param({ "NORMAL", "GAUSSIAN" }) diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java index cfd210b419be9..cd30f705efc74 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java @@ -12,42 +12,52 @@ import java.util.Arrays; /** - * Data structure for effectively computing by how much the scale of a histogram needs to be reduced to reach a target bucket count. - * This works by looking at each pair of neighboring buckets and checking at which scale reduction they would collapse to a single bucket. + * A data structure for efficiently computing the required scale reduction for a histogram to reach a target number of buckets. + * This works by examining pairs of neighboring buckets and determining at which scale reduction they would merge into a single bucket. */ class DownscaleStats { - // collapsedBucketCount[i] represents the number of additional + // collapsedBucketCount[i] stores the number of additional // collapsed buckets when increasing the scale by (i+1) instead of just by (i) int[] collapsedBucketCount = new int[63]; + /** + * Resets the data structure to its initial state. + */ void reset() { Arrays.fill(collapsedBucketCount, 0); } void add(long previousBucketIndex, long currentBucketIndex) { if (currentBucketIndex <= previousBucketIndex) { - throw new IllegalArgumentException("currentBucketIndex must be bigger than previousBucketIndex"); + throw new IllegalArgumentException("currentBucketIndex must be greater than previousBucketIndex"); } - /* Below is an efficient variant of the following algorithm: - for (int i=0; i<63; i++) { - if (prevIndex>>(i+1) == currIndex>>(i+1)) { - collapsedBucketCount[i]++; - break; - } - } - So we find the smallest scale reduction required to make the two buckets collapse into one - */ + /* + * Below is an efficient variant of the following algorithm: + * for (int i=0; i<63; i++) { + * if (prevIndex>>(i+1) == currIndex>>(i+1)) { + * collapsedBucketCount[i]++; + * break; + * } + * } + * So we find the smallest scale reduction required to make the two buckets collapse into one. + */ long bitXor = previousBucketIndex ^ currentBucketIndex; int numEqualLeadingBits = Long.numberOfLeadingZeros(bitXor); if (numEqualLeadingBits == 0) { - // right-shifting will never make the buckets combine, because one is positive and the other negative + // right-shifting will never make the buckets combine, because one is positive and the other is negative return; } int requiredScaleChange = 64 - numEqualLeadingBits; collapsedBucketCount[requiredScaleChange - 1]++; } + /** + * Returns the number of buckets that will be merged after applying the given scale reduction. + * + * @param reduction the scale reduction factor + * @return the number of buckets that will be merged + */ int getCollapsedBucketCountAfterScaleReduction(int reduction) { int totalCollapsed = 0; for (int i = 0; i < reduction; i++) { @@ -56,6 +66,12 @@ int getCollapsedBucketCountAfterScaleReduction(int reduction) { return totalCollapsed; } + /** + * Returns the required scale reduction to reduce the number of buckets by at least the given amount. + * + * @param desiredCollapsedBucketCount the target number of buckets to collapse + * @return the required scale reduction + */ int getRequiredScaleReductionToReduceBucketCountBy(int desiredCollapsedBucketCount) { if (desiredCollapsedBucketCount == 0) { return 0; @@ -67,6 +83,6 @@ int getRequiredScaleReductionToReduceBucketCountBy(int desiredCollapsedBucketCou return i + 1; } } - throw new IllegalArgumentException("it is not possible to reduce the bucket count by " + desiredCollapsedBucketCount); + throw new IllegalArgumentException("Cannot reduce the bucket count by " + desiredCollapsedBucketCount); } } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java index 2f13c28b03776..78ff1bfa06b42 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java @@ -12,98 +12,104 @@ import java.util.OptionalLong; /** - * Interface for implementations of exponential histograms adhering to the opentelemetry definition. - * This interface explicitly allows for sparse implementation: It does not offer to directly access buckets by index, instead it - * is only possible to iterate over the buckets.
+ * Interface for implementations of exponential histograms adhering to the + * OpenTelemetry definition. + * This interface supports sparse implementations, allowing iteration over buckets without requiring direct index access.
* The most important properties are: *
    - *
  • The histogram has a scale parameter, which defines the accuracy. The base for the buckets is defined as base = 2^(2^-scale)
  • - *
  • The histogram bucket at index i has the range (base^i, base^(i+1)]
  • - *
  • Negative values are represented by a separate negative range of buckets with the boundaries (-base^(i+1), -base^i]
  • - *
  • histograms are perfectly subsetting: Increasing the scale by one exactly merges each pair of neighbouring buckets
  • - *
  • a special {@link ZeroBucket} is used to handle zero and close to zero values
  • + *
  • The histogram has a scale parameter, which defines the accuracy. + * The {@code base} for the buckets is defined as {@code base = 2^(2^-scale)}
  • + *
  • The histogram bucket at index {@code i} has the range {@code (base^i, base^(i+1)]}
  • + *
  • Negative values are represented by a separate negative range of buckets with the boundaries {@code (-base^(i+1), -base^i]}
  • + *
  • Histograms are perfectly subsetting: increasing the scale by one merges each pair of neighboring buckets
  • + *
  • A special {@link ZeroBucket} is used to handle zero and close-to-zero values
  • *
* *
- * In addition, in all algorithms we make a central assumption about the distribution of samples within each bucket: - * We assume they all lie on the single point of least error relative to the bucket boundaries (see {@link ExponentialScaleUtils#getPointOfLeastRelativeError(long, int)}). + * Additionally, all algorithms assume that samples within a bucket are located at a single point: the point of least relative error + * (see {@link ExponentialScaleUtils#getPointOfLeastRelativeError(long, int)}). */ public interface ExponentialHistogram { - //TODO: support min/max/sum/count storage and merging - //TODO: Add special positive and negative infinity buckets to allow representation of explicit bucket histograms with open boundaries + // TODO: support min/max/sum/count storage and merging + // TODO: Add special positive and negative infinity buckets to allow representation of explicit bucket histograms with open boundaries - // scale of 38 is the largest scale where at the borders we don't run into problems due to floating point precision when computing - // indices for double values - // Theoretically, a MAX_SCALE of 51 would work and would still cover the entire range of double values - // For that to work, we'll have to rework the math of converting from double to indices and back + // A scale of 38 is the largest scale where we don't run into problems at the borders due to floating-point precision when computing + // indices for double values. + // Theoretically, a MAX_SCALE of 51 would work and would still cover the entire range of double values. + // For that to work, the math for converting from double to indices and back would need to be reworked. // One option would be to use "Quadruple": https://github.com/m-vokhm/Quadruple int MAX_SCALE = 38; - // At this scale all double values already fall into a single bucket + // At this scale, all double values fall into a single bucket. int MIN_SCALE = -11; - // Only use 62 bit at max to allow to compute the difference between the smallest and largest index without causing overflow - // Also the extra bit gives us room for some tricks for compact storage + // Only use 62 bits at max to allow computing the difference between the smallest and largest index without causing an overflow. + // The extra bit also provides room for compact storage tricks. int MAX_INDEX_BITS = 62; long MAX_INDEX = (1L << MAX_INDEX_BITS) - 1; long MIN_INDEX = -MAX_INDEX; /** - * The scale of the histogram. Higher scales result in higher accuracy, but potentially higher bucket count. + * The scale of the histogram. Higher scales result in higher accuracy but potentially more buckets. * Must be less than or equal to {@link #MAX_SCALE} and greater than or equal to {@link #MIN_SCALE}. + * + * @return the scale of the histogram */ int scale(); /** - * @return the {@link ZeroBucket} representing the number of zero (or close to zero) values and its threshold + * @return the {@link ZeroBucket} representing the number of zero (or close-to-zero) values and its threshold */ ZeroBucket zeroBucket(); /** - * @return a {@link BucketIterator} for the populated, positive buckets of this histogram. {@link BucketIterator#scale()} of the return value must return the same value as {@link #scale()}. + * @return a {@link BucketIterator} for the populated, positive buckets of this histogram. + * The {@link BucketIterator#scale()} of the returned iterator must be the same as {@link #scale()}. */ CopyableBucketIterator positiveBuckets(); /** - * @return a {@link BucketIterator} for the populated, negative buckets of this histogram. {@link BucketIterator#scale()} of the return value must return the same value as {@link #scale()}. + * @return a {@link BucketIterator} for the populated, negative buckets of this histogram. + * The {@link BucketIterator#scale()} of the returned iterator must be the same as {@link #scale()}. */ CopyableBucketIterator negativeBuckets(); /** * Returns the highest populated bucket index, taking both negative and positive buckets into account. - * If there are neither positive nor negative buckets populated, an empty optional is returned. + * + * @return the highest populated bucket index, or an empty optional if no buckets are populated */ OptionalLong maximumBucketIndex(); /** - * Iterator over non-empty buckets of the histogram. Can represent either the positive or negative histogram range. + * An iterator over the non-empty buckets of the histogram for either the positive or negative range. *
    - *
  • The iterator always iterates from the lowest bucket index to the highest
  • - *
  • The iterator never returns duplicate buckets (buckets with the same index)
  • - *
  • The iterator never returns empty buckets ({@link #peekCount() is never zero}
  • + *
  • The iterator always iterates from the lowest bucket index to the highest.
  • + *
  • The iterator never returns duplicate buckets (buckets with the same index).
  • + *
  • The iterator never returns empty buckets ({@link #peekCount()} is never zero).
  • *
*/ interface BucketIterator { /** * Checks if there are any buckets remaining to be visited by this iterator. - * If the end has been reached, it is illegal to call {@link #peekCount()}, {@link #peekIndex()} or {@link #advance()}. + * If the end has been reached, it is illegal to call {@link #peekCount()}, {@link #peekIndex()}, or {@link #advance()}. * - * @return false, if the end has been reached, true otherwise. + * @return {@code true} if the iterator has more elements, {@code false} otherwise */ boolean hasNext(); /** - * The number of items in the bucket this iterator currently points at. Does not advance the iterator by itself and therefore can be called repeatedly to return the same value. - * Must not be called if {@link #hasNext()} returns false. + * The number of items in the bucket at the current iterator position. Does not advance the iterator. + * Must not be called if {@link #hasNext()} returns {@code false}. * * @return the number of items in the bucket, always greater than zero */ long peekCount(); /** - * The index of the bucket this iterator currently points at. Does not advance the iterator by itself and therefore can be called repeatedly to return the same value. - * Must not be called if {@link #hasNext()} returns false. + * The index of the bucket at the current iterator position. Does not advance the iterator. + * Must not be called if {@link #hasNext()} returns {@code false}. * * @return the index of the bucket, guaranteed to be in the range [{@link #MIN_INDEX}, {@link #MAX_INDEX}] */ @@ -111,29 +117,31 @@ interface BucketIterator { /** * Moves the iterator to the next, non-empty bucket. - * If {@link #hasNext()} is true after {@link #advance()}, {@link #peekIndex()} is guaranteed to return a value bigger than prior to the {@link #advance()} call. + * If {@link #hasNext()} is {@code true} after calling {@link #advance()}, {@link #peekIndex()} is guaranteed to return a value + * greater than the value returned prior to the {@link #advance()} call. */ void advance(); /** * Provides the scale that can be used to convert indices returned by {@link #peekIndex()} to the bucket boundaries, - * e.g. via {@link ExponentialScaleUtils#getLowerBucketBoundary(long, int)}. + * e.g., via {@link ExponentialScaleUtils#getLowerBucketBoundary(long, int)}. * - * @return the scale, which is guaranteed to be constant over the lifetime of this iterator. + * @return the scale, which is guaranteed to be constant over the lifetime of this iterator */ int scale(); } /** - * A {@link BucketIterator} which can be copied. + * A {@link BucketIterator} that can be copied. */ interface CopyableBucketIterator extends BucketIterator { /** - * Provides a bucket iterator pointing at the same bucket of the same range of buckets as this iterator. - * Calling {@link #advance()} on the copied iterator does not affect this and vice-versa. + * Creates a copy of this bucket iterator, pointing at the same bucket of the same range of buckets. + * Calling {@link #advance()} on the copied iterator does not affect this instance and vice-versa. + * + * @return a copy of this iterator */ CopyableBucketIterator copy(); } - } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java index 578e2267db289..eeb66d6f96920 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java @@ -15,17 +15,17 @@ import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.computeIndex; /** - * Class for accumulating raw values into an {@link ExponentialHistogram} with a given maximum bucket count. + * A class for accumulating raw values into an {@link ExponentialHistogram} with a given maximum number of buckets. * - * If the number of values is less than or equal the bucket capacity, the resulting histogram is guaranteed - * to represent the exact raw values with a relative error less than 2^(2^-MAX_SCALE) - 1 + * If the number of values is less than or equal to the bucket capacity, the resulting histogram is guaranteed + * to represent the exact raw values with a relative error less than {@code 2^(2^-MAX_SCALE) - 1}. */ public class ExponentialHistogramGenerator { - // Merging individual values into a histogram would way to slow with our sparse, array-backed histogram representation - // Therefore for a bucket capacity of c, we first buffer c raw values to be inserted - // we then turn those into an "exact" histogram, which in turn we merge with our actual result accumulator - // This yields an amortized runtime of O( log(c) ) + // Merging individual values into a histogram would be way too slow with our sparse, array-backed histogram representation. + // Therefore, for a bucket capacity of c, we first buffer c raw values to be inserted. + // We then turn those into an "exact" histogram, which in turn we merge with our actual result accumulator. + // This yields an amortized runtime of O(log(c)). private final double[] rawValueBuffer; int valueCount; @@ -34,6 +34,11 @@ public class ExponentialHistogramGenerator { private boolean isFinished = false; + /** + * Creates a new instance with the specified maximum number of buckets. + * + * @param maxBucketCount the maximum number of buckets for the generated histogram + */ public ExponentialHistogramGenerator(int maxBucketCount) { rawValueBuffer = new double[maxBucketCount]; valueCount = 0; @@ -42,8 +47,10 @@ public ExponentialHistogramGenerator(int maxBucketCount) { } /** - * Add the given value to the histogram. - * Must not be calles after {@link #get()} has been called. + * Adds the given value to the histogram. + * Must not be called after {@link #get()} has been called. + * + * @param value the value to add */ public void add(double value) { if (isFinished) { @@ -57,7 +64,9 @@ public void add(double value) { } /** - * @return the histogram representing the distribution of all accumulated values. + * Returns the histogram representing the distribution of all accumulated values. + * + * @return the histogram representing the distribution of all accumulated values */ public ExponentialHistogram get() { isFinished = true; @@ -66,17 +75,25 @@ public ExponentialHistogram get() { } /** - * Create a histogram representing the distribution of the given values. + * Creates a histogram representing the distribution of the given values. * The histogram will have a bucket count of at most the length of the provided array - * and will have a relative error less than 2^(2^-MAX_SCALE) - 1. + * and will have a relative error less than {@code 2^(2^-MAX_SCALE) - 1}. + * + * @param values the values to be added to the histogram + * @return a new {@link ExponentialHistogram} */ public static ExponentialHistogram createFor(double... values) { return createFor(values.length, Arrays.stream(values)); } + /** - * Create a histogram representing the distribution of the given values with at most the given number of buckets. - * If the given bucketCount is greater or equal to the number of values, the resulting histogram will have a - * relative error of less than 2^(2^-MAX_SCALE) - 1. + * Creates a histogram representing the distribution of the given values with at most the given number of buckets. + * If the given bucketCount is greater than or equal to the number of values, the resulting histogram will have a + * relative error of less than {@code 2^(2^-MAX_SCALE) - 1}. + * + * @param bucketCount the maximum number of buckets + * @param values a stream of values to be added to the histogram + * @return a new {@link ExponentialHistogram} */ public static ExponentialHistogram createFor(int bucketCount, DoubleStream values) { ExponentialHistogramGenerator generator = new ExponentialHistogramGenerator(bucketCount); diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java index fa2040e493381..352f5ae51494d 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java @@ -9,9 +9,7 @@ package org.elasticsearch.exponentialhistogram; -import java.util.Arrays; import java.util.OptionalLong; -import java.util.stream.Stream; import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.getMaximumScaleIncrease; @@ -29,6 +27,8 @@ public class ExponentialHistogramMerger { private boolean isFinished; /** + * Creates a new instance with the specified bucket limit. + * * @param bucketLimit the maximum number of buckets the result histogram is allowed to have */ public ExponentialHistogramMerger(int bucketLimit) { @@ -44,6 +44,12 @@ public ExponentialHistogramMerger(int bucketLimit) { buffer.resetBuckets(minScale); } + /** + * Merges the given histogram into the current result. + * Must not be called after {@link #get()} has been called. + * + * @param toAdd the histogram to merge + */ public void add(ExponentialHistogram toAdd) { if (isFinished) { throw new IllegalStateException("get() has already been called"); @@ -54,6 +60,12 @@ public void add(ExponentialHistogram toAdd) { buffer = temp; } + /** + * Returns the merged histogram. + * Must not be called multiple times. + * + * @return the merged histogram + */ public ExponentialHistogram get() { if (isFinished) { throw new IllegalStateException("get() has already been called"); @@ -64,7 +76,8 @@ public ExponentialHistogram get() { // TODO: this algorithm is very efficient if b has roughly as many buckets as a // However, if b is much smaller we still have to iterate over all buckets of a which is very wasteful - // This can be optimized by buffering multiple histograms to accumulate first, then in O(log(b)) turn them into a single, merged histogram + // This can be optimized by buffering multiple histograms to accumulate first, + // then in O(log(b)) turn them into a single, merged histogram // (b is the number of buffered buckets) private void merge(FixedCapacityExponentialHistogram output, ExponentialHistogram a, ExponentialHistogram b) { @@ -78,9 +91,9 @@ private void merge(FixedCapacityExponentialHistogram output, ExponentialHistogra output.setZeroBucket(zeroBucket); - // we will attempt to bring everything to the scale of A - // this might involve increasing the scale for B, which in turn would increase the indices - // we need to make sure to not exceed MAX_INDEX / MIN_INDEX for those in this case + // We attempt to bring everything to the scale of A. + // This might involve increasing the scale for B, which would increase its indices. + // We need to ensure that we do not exceed MAX_INDEX / MIN_INDEX in this case. int targetScale = a.scale(); if (targetScale > b.scale()) { if (posBucketsB.hasNext()) { @@ -97,9 +110,9 @@ private void merge(FixedCapacityExponentialHistogram output, ExponentialHistogra } } - // Now we are sure that everything fits numerically into targetScale - // however, we might exceed our limit for the total number of buckets - // therefore we try the merging optimistically, and if we fail we reduce the target scale accordingly to make everything fit + // Now we are sure that everything fits numerically into targetScale. + // However, we might exceed our limit for the total number of buckets. + // Therefore, we try the merge optimistically. If we fail, we reduce the target scale to make everything fit. MergingBucketIterator positiveMerged = new MergingBucketIterator(posBucketsA.copy(), posBucketsB.copy(), targetScale); MergingBucketIterator negativeMerged = new MergingBucketIterator(negBucketsA.copy(), negBucketsB.copy(), targetScale); @@ -110,7 +123,7 @@ private void merge(FixedCapacityExponentialHistogram output, ExponentialHistogra overflowCount += putBuckets(output, positiveMerged, true, downscaleStats); if (overflowCount > 0) { - // UDD-sketch approach: we decrease the scale and retry + // UDD-sketch approach: decrease the scale and retry. int reduction = downscaleStats.getRequiredScaleReductionToReduceBucketCountBy(overflowCount); targetScale -= reduction; output.resetBuckets(targetScale); diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java index 711d2dfc191a9..0912299ddb307 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java @@ -10,19 +10,20 @@ package org.elasticsearch.exponentialhistogram; /** - * Quantile estimation for {@link ExponentialHistogram}s. + * Provides quantile estimation for {@link ExponentialHistogram} instances. */ public class ExponentialHistogramQuantile { /** - * Provides a quantile for the distribution represented by the given histogram. + * Estimates a quantile for the distribution represented by the given histogram. * - * It returns the value of the element at rank {@code max(0, min( n-1, (quantile * (n+1))-1))}, where rank starts at 0. - * If that value is fractional, we linearly interpolate based on the fraction the values of the two neighboring ranks. + * It returns the value of the element at rank {@code max(0, min(n - 1, (quantile * (n + 1)) - 1))}, where n is the total number of + * values and rank starts at 0. If the rank is fractional, the result is linearly interpolated from the values of the two + * neighboring ranks. * - * @param histo the histogram representing the distribution - * @param quantile the quantile to query, in the range [0,1] - * @return NaN if the histogram is empty, otherwise the quantile + * @param histo the histogram representing the distribution + * @param quantile the quantile to query, in the range [0, 1] + * @return the estimated quantile value, or {@link Double#NaN} if the histogram is empty */ public static double getQuantile(ExponentialHistogram histo, double quantile) { if (quantile < 0 || quantile > 1) { @@ -35,7 +36,7 @@ public static double getQuantile(ExponentialHistogram histo, double quantile) { long totalCount = zeroCount + negCount + posCount; if (totalCount == 0) { - // Can't compute quantile on empty histogram + // Can't compute quantile on an empty histogram return Double.NaN; } @@ -44,9 +45,13 @@ public static double getQuantile(ExponentialHistogram histo, double quantile) { long upperRank = (long) Math.ceil(exactRank); double upperFactor = exactRank - lowerRank; - // TODO: if we want more performance here, we could iterate the buckets once instead of twice - return getElementAtRank(histo, lowerRank, negCount, zeroCount) * ( 1 - upperFactor) - +getElementAtRank(histo, upperRank, negCount, zeroCount) * upperFactor; + // TODO: This can be optimized to iterate over the buckets once instead of twice. + return getElementAtRank(histo, lowerRank, negCount, zeroCount) * (1 - upperFactor) + getElementAtRank( + histo, + upperRank, + negCount, + zeroCount + ) * upperFactor; } private static double getElementAtRank(ExponentialHistogram histo, long rank, long negCount, long zeroCount) { @@ -68,7 +73,7 @@ private static double getBucketMidpointForRank(ExponentialHistogram.BucketIterat } buckets.advance(); } - throw new IllegalStateException("buckets contain in total less elements than the desired rank"); + throw new IllegalStateException("The total number of elements in the buckets is less than the desired rank."); } private static long getTotalCount(ExponentialHistogram.BucketIterator buckets) { diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java index efef70d513a7f..a0f21790e8e89 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java @@ -11,86 +11,84 @@ import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX_BITS; -import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_SCALE; /** - * Utils for working with indices and scales of exponential bucket histograms. + * A collection of utility methods for working with indices and scales of exponential bucket histograms. */ public class ExponentialScaleUtils { private static final double LN_2 = Math.log(2); /** - * Only visible for testing, the test ensures that this table is up-to-date. + * This table is visible for testing to ensure it is up-to-date. *
- * For each scale from {@link ExponentialHistogram#MIN_SCALE} to {@link ExponentialHistogram#MAX_SCALE} - * the table contains a pre-computed constant for performing up-scaling of bucket indices. + * For each scale from {@link ExponentialHistogram#MIN_SCALE} to {@link ExponentialHistogram#MAX_SCALE}, + * the table contains a pre-computed constant for up-scaling bucket indices. * The constant is computed using the following formula: - * (1 + 2^scale * ( 1 - log2(1 + 2^(2^-scale)))) + * {@code (1 + 2^scale * (1 - log2(1 + 2^(2^-scale))))} */ - static final double[] SCALE_UP_CONSTANT_TABLE = new double[]{ - 4.8828125E-4, - 9.765625E-4, - 0.001953125, - 0.00390625, - 0.0078125, - 0.015625, - 0.03124999998950301, - 0.06249862414928998, - 0.12429693135076524, - 0.22813428968741514, - 0.33903595255631885, - 0.4150374992788438, - 0.45689339367277604, - 0.47836619809201575, - 0.4891729613112115, - 0.49458521106164327, - 0.497292446757125, - 0.4986462035295225, - 0.4993230992835585, - 0.4996615493316266, - 0.49983077462704417, - 0.49991538730867596, - 0.4999576936537322, - 0.4999788468267904, - 0.4999894234133857, - 0.4999947117066917, - 0.4999973558533457, - 0.49999867792667285, - 0.4999993389633364, - 0.4999996694816682, - 0.4999998347408341, - 0.49999991737041705, - 0.4999999586852085, - 0.49999997934260426, - 0.49999998967130216, - 0.49999999483565105, - 0.4999999974178255, - 0.49999999870891276, - 0.4999999993544564, - 0.4999999996772282, - 0.4999999998386141, - 0.49999999991930705, - 0.49999999995965355, - 0.49999999997982675, - 0.4999999999899134, - 0.4999999999949567, - 0.49999999999747835, - 0.4999999999987392, - 0.49999999999936956, - 0.4999999999996848 - }; + static final double[] SCALE_UP_CONSTANT_TABLE = new double[] { + 4.8828125E-4, + 9.765625E-4, + 0.001953125, + 0.00390625, + 0.0078125, + 0.015625, + 0.03124999998950301, + 0.06249862414928998, + 0.12429693135076524, + 0.22813428968741514, + 0.33903595255631885, + 0.4150374992788438, + 0.45689339367277604, + 0.47836619809201575, + 0.4891729613112115, + 0.49458521106164327, + 0.497292446757125, + 0.4986462035295225, + 0.4993230992835585, + 0.4996615493316266, + 0.49983077462704417, + 0.49991538730867596, + 0.4999576936537322, + 0.4999788468267904, + 0.4999894234133857, + 0.4999947117066917, + 0.4999973558533457, + 0.49999867792667285, + 0.4999993389633364, + 0.4999996694816682, + 0.4999998347408341, + 0.49999991737041705, + 0.4999999586852085, + 0.49999997934260426, + 0.49999998967130216, + 0.49999999483565105, + 0.4999999974178255, + 0.49999999870891276, + 0.4999999993544564, + 0.4999999996772282, + 0.4999999998386141, + 0.49999999991930705, + 0.49999999995965355, + 0.49999999997982675, + 0.4999999999899134, + 0.4999999999949567, + 0.49999999999747835, + 0.4999999999987392, + 0.49999999999936956, + 0.4999999999996848 }; /** - * Computes the new index for a bucket when adjusting the scale of the histogram by the given amount. - * Note that this method does not only support down-scaling (=reducing the scale), but also upscaling. - * When scaling up, it will provide the bucket containing the point of least error of the original bucket. + * Computes the new index for a bucket when adjusting the scale of the histogram. + * This method supports both down-scaling (reducing the scale) and up-scaling. + * When up-scaling, it returns the bucket containing the point of least error of the original bucket. * - * @param index the current bucket index to be upscaled - * @param currentScale the current scale - * @param scaleAdjustment the adjustment to make, the new scale will be currentScale + scaleAdjustment + * @param index the current bucket index to be adjusted + * @param currentScale the current scale + * @param scaleAdjustment the adjustment to make; the new scale will be {@code currentScale + scaleAdjustment} * @return the index of the bucket in the new scale */ static long adjustScale(long index, int currentScale, int scaleAdjustment) { @@ -98,21 +96,27 @@ static long adjustScale(long index, int currentScale, int scaleAdjustment) { return index >> -scaleAdjustment; } else { // When scaling up, we want to return the bucket containing the point of least relative error. - // This bucket index can be computed as (index << adjustment) + offset - // Hereby offset is a constant which does not depend on the index, but only on the scale and adjustment - // The mathematically correct formula for offset is as follows: - // 2^adjustment * (1 + 2^currentScale * ( 1 - log2(1 + 2^(2^-scale)))) - // This is hard to compute in double precision, as it causes rounding errors, also it is quite expensive - // Therefore we precompute (1 + 2^currentScale * ( 1 - log2(1 + 2^(2^-scale)))) and store it - // in SCALE_UP_CONSTANT_TABLE for each scale + // This bucket index can be computed as (index << adjustment) + offset. + // The offset is a constant that depends only on the scale and adjustment, not the index. + // The mathematically correct formula for the offset is: + // 2^adjustment * (1 + 2^currentScale * (1 - log2(1 + 2^(2^-currentScale)))) + // This is hard to compute with double-precision floating-point numbers due to rounding errors and is also expensive. + // Therefore, we precompute (1 + 2^currentScale * (1 - log2(1 + 2^(2^-currentScale)))) and store it + // in SCALE_UP_CONSTANT_TABLE for each scale. double offset = Math.scalb(SCALE_UP_CONSTANT_TABLE[currentScale - MIN_SCALE], scaleAdjustment); return (index << scaleAdjustment) + (long) Math.floor(offset); } } - /** - * Equivalent to mathematically correct comparison of the lower bucket boundaries of the given buckets + * Compares the lower boundaries of two buckets, which may have different scales. + * This is equivalent to a mathematically correct comparison of the lower bucket boundaries. + * + * @param idxA the index of the first bucket + * @param scaleA the scale of the first bucket + * @param idxB the index of the second bucket + * @param scaleB the scale of the second bucket + * @return a negative integer, zero, or a positive integer as the first bucket's lower boundary is less than, equal to, or greater than the second bucket's lower boundary */ public static int compareLowerBoundaries(long idxA, int scaleA, long idxB, int scaleB) { if (scaleA > scaleB) { @@ -122,10 +126,10 @@ public static int compareLowerBoundaries(long idxA, int scaleA, long idxB, int s int shifts = scaleB - scaleA; int maxScaleAdjustment = getMaximumScaleIncrease(idxA); if (maxScaleAdjustment < shifts) { - // we would overflow if we adjust A to the scale of B - // so if A is negative, scaling would produce a number less than Long.MIN_VALUE, therefore it is definitely smaller than B - // if A is positive, scaling would produce a number bigger than Long.MAX_VALUE, therefore it is definitely bigger than B - // if A is zero => shifting and therefore scale adjustment would not have any effect + // We would overflow if we adjusted A to the scale of B. + // If A is negative, scaling would produce a number less than Long.MIN_VALUE, so it is smaller than B. + // If A is positive, scaling would produce a number greater than Long.MAX_VALUE, so it is larger than B. + // If A is zero, shifting and scale adjustment have no effect. if (idxA == 0) { return Long.compare(0, idxB); } else { @@ -138,8 +142,11 @@ public static int compareLowerBoundaries(long idxA, int scaleA, long idxB, int s } /** - * Returns the maximum permissible scale-increase which does not cause an overflow - * of the index. + * Returns the maximum permissible scale increase that does not cause the index to grow out + * of the [{@link ExponentialHistogram#MIN_INDEX}, {@link ExponentialHistogram#MIN_INDEX}] range. + * + * @param index the index to check + * @return the maximum permissible scale increase */ public static int getMaximumScaleIncrease(long index) { if (index < MIN_INDEX || index > MAX_INDEX) { @@ -151,24 +158,38 @@ public static int getMaximumScaleIncrease(long index) { return Long.numberOfLeadingZeros(index) - (64 - MAX_INDEX_BITS); } + /** + * Returns the upper boundary of the bucket with the given index and scale. + * + * @param index the index of the bucket + * @param scale the scale of the bucket + * @return the upper boundary of the bucket + */ public static double getUpperBucketBoundary(long index, int scale) { return getLowerBucketBoundary(index + 1, scale); } + /** + * Returns the lower boundary of the bucket with the given index and scale. + * + * @param index the index of the bucket + * @param scale the scale of the bucket + * @return the lower boundary of the bucket + */ public static double getLowerBucketBoundary(long index, int scale) { double inverseFactor = Math.scalb(LN_2, -scale); return Math.exp(inverseFactor * index); } /** - * For a bucket with the given index, computes the point x in the bucket so that - * (x - l) / l equals (u - x) / u where l is the lower bucket boundary and where u - * is the upper bucket boundary. + * For a bucket with the given index, computes the point {@code x} in the bucket such that + * {@code (x - l) / l} equals {@code (u - x) / u}, where {@code l} is the lower bucket boundary and {@code u} + * is the upper bucket boundary. *
- * In other words we select the point in the bucket which is guaranteed to have the least relative error towards any point in the bucket. + * In other words, we select the point in the bucket that has the least relative error with respect to any other point in the bucket. * - * @param bucketIndex the bucket index - * @param scale the scale of the histogram + * @param bucketIndex the index of the bucket + * @param scale the scale of the bucket * @return the point of least relative error */ public static double getPointOfLeastRelativeError(long bucketIndex, int scale) { @@ -178,17 +199,20 @@ public static double getPointOfLeastRelativeError(long bucketIndex, int scale) { } /** - * Provides the index of the bucket of the exponential histogram with the given scale - * containing the provided value. + * Provides the index of the bucket of the exponential histogram with the given scale that contains the provided value. + * + * @param value the value to find the bucket for + * @param scale the scale of the histogram + * @return the index of the bucket */ public static long computeIndex(double value, int scale) { return Indexing.computeIndex(value, scale); } /** - * The code in this class has been copied and slightly adapted from the - * OpenTelemetry Base2ExponentialHistogramIndexer implementation - * licensed under Apache License 2.0. + * The code in this class was copied and slightly adapted from the + * OpenTelemetry Base2ExponentialHistogramIndexer implementation, + * licensed under the Apache License 2.0. */ private static class Indexing { @@ -261,5 +285,4 @@ private static long mapToIndexScaleZero(double value) { return ieeeExponent; } } - } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java index 3cde0f299a59a..38ba66b7a2012 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java @@ -12,17 +12,17 @@ import java.util.OptionalLong; /** - * Implementation of a mutable {@link ExponentialHistogram} with a sparse, array-backed representation. + * An implementation of a mutable {@link ExponentialHistogram} with a sparse, array-backed representation. *
- * Consumers must ensure that if the histogram is mutated, all previously acquired @{@link org.elasticsearch.exponentialhistogram.ExponentialHistogram.BucketIterator} - * must not be used anymore. + * Consumers must ensure that if the histogram is mutated, all previously acquired {@link ExponentialHistogram.BucketIterator} + * instances are no longer used. */ public final class FixedCapacityExponentialHistogram implements ExponentialHistogram { // TODO: maybe switch to BigArrays? - // These arrays represent both the positive and the negative buckets - // They store first all negative buckets in ascending index order, followed by the positive buckets in ascending index order + // These arrays represent both the positive and the negative buckets. + // They store all negative buckets first, in ascending index order, followed by all positive buckets, also in ascending index order. private final long[] bucketIndices; private final long[] bucketCounts; @@ -34,9 +34,9 @@ public final class FixedCapacityExponentialHistogram implements ExponentialHisto /** * Creates an empty histogram with the given capacity and a {@link ZeroBucket#minimalEmpty()} zero bucket. - * The scale is set to the maximum possible precisions ({@link #MAX_SCALE}). + * The scale is initialized to the maximum possible precision ({@link #MAX_SCALE}). * - * @param bucketCapacity the maximum number of positive and negative buckets this histogram can hold in total. + * @param bucketCapacity the maximum total number of positive and negative buckets this histogram can hold. */ public FixedCapacityExponentialHistogram(int bucketCapacity) { bucketIndices = new long[bucketCapacity]; @@ -45,7 +45,7 @@ public FixedCapacityExponentialHistogram(int bucketCapacity) { } /** - * Resets this histogram so it has the same state as a newly constructed one with the same capacity. + * Resets this histogram to the same state as a newly constructed one with the same capacity. */ public void reset() { setZeroBucket(ZeroBucket.minimalEmpty()); @@ -53,11 +53,11 @@ public void reset() { } /** - * Removes all positive and negative buckets of this histogram. Sets the scale to the given value. + * Removes all positive and negative buckets from this histogram and sets the scale to the given value. */ public void resetBuckets(int scale) { if (scale > MAX_SCALE || scale < MIN_SCALE) { - throw new IllegalArgumentException("scale must be in range ["+MIN_SCALE+".."+MAX_SCALE+"]"); + throw new IllegalArgumentException("scale must be in range [" + MIN_SCALE + ".." + MAX_SCALE + "]"); } negativeBucketCount = 0; positiveBucketCount = 0; @@ -71,41 +71,41 @@ public ZeroBucket zeroBucket() { /** * Replaces the zero bucket of this histogram with the given one. - * Callers must ensure that the given @{@link ZeroBucket} does not overlap any of the positive or negative buckets of this histogram. + * Callers must ensure that the given {@link ZeroBucket} does not overlap with any of the positive or negative buckets of this histogram. */ public void setZeroBucket(ZeroBucket zeroBucket) { this.zeroBucket = zeroBucket; } /** - * Attempts to add a bucket to the range of positive or negative buckets of this histogram. + * Attempts to add a bucket to the positive or negative range of this histogram. *
- * Callers have to adhere to the following rules: + * Callers must adhere to the following rules: *
    - *
  • All buckets of the negative range must be provided before the first one from the positive range
  • - *
  • For both the negative and positive range, buckets must be provided in ascending index order
  • - *
  • It is not allowed to provide the same bucket more than once
  • - *
  • It is not allowed to add empty buckets
  • + *
  • All buckets from the negative range must be provided before the first one from the positive range.
  • + *
  • For both the negative and positive ranges, buckets must be provided in ascending index order.
  • + *
  • It is not allowed to provide the same bucket more than once.
  • + *
  • It is not allowed to add empty buckets (count <= 0).
  • *
* - * If any of the rules above are violated, this call fails with an exception. - * In contrast if the bucket cannot be added because the maximum capacity has been reached, the call will not modify the state - * of this histogram and return false. + * If any of these rules are violated, this call will fail with an exception. + * If the bucket cannot be added because the maximum capacity has been reached, the call will not modify the state + * of this histogram and will return {@code false}. * - * @param index the index of the bucket to add - * @param count the count to associate with the given bucket - * @param isPositive true, if the bucket to add belongs to the positive range, false if it belongs to the negative range - * @return true if the bucket was added, false if it could not be added due to insufficient capacity + * @param index the index of the bucket to add + * @param count the count to associate with the given bucket + * @param isPositive {@code true} if the bucket belongs to the positive range, {@code false} if it belongs to the negative range + * @return {@code true} if the bucket was added, {@code false} if it could not be added due to insufficient capacity */ public boolean tryAddBucket(long index, long count, boolean isPositive) { if (index < MIN_INDEX || index > MAX_INDEX) { throw new IllegalArgumentException("index must be in range [" + MIN_INDEX + ".." + MAX_INDEX + "]"); } if (isPositive == false && positiveBucketCount > 0) { - throw new IllegalArgumentException("Cannot add negative buckets after a positive bucket was added"); + throw new IllegalArgumentException("Cannot add negative buckets after a positive bucket has been added"); } if (count <= 0) { - throw new IllegalArgumentException("Cannot add an empty bucket"); + throw new IllegalArgumentException("Cannot add an empty or negative bucket"); } int slot = negativeBucketCount + positiveBucketCount; if (slot >= bucketCounts.length) { @@ -114,13 +114,13 @@ public boolean tryAddBucket(long index, long count, boolean isPositive) { bucketIndices[slot] = index; bucketCounts[slot] = count; if (isPositive) { - if (positiveBucketCount > 0 && bucketIndices[slot - 1] > index) { - throw new IllegalStateException("Buckets must be added in ascending index order!"); + if (positiveBucketCount > 0 && bucketIndices[slot - 1] >= index) { + throw new IllegalStateException("Buckets must be added in strictly ascending index order"); } positiveBucketCount++; } else { - if (negativeBucketCount > 0 && bucketIndices[slot - 1] > index) { - throw new IllegalStateException("Buckets must be added in ascending index order!"); + if (negativeBucketCount > 0 && bucketIndices[slot - 1] >= index) { + throw new IllegalStateException("Buckets must be added in strictly ascending index order"); } negativeBucketCount++; } @@ -199,9 +199,8 @@ public CopyableBucketIterator copy() { private void ensureEndNotReached() { if (hasNext() == false) { - throw new IllegalStateException("No more buckets"); + throw new IllegalStateException("Iterator has no more buckets"); } } } - } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java index 7e3d2458888d7..def9a81952b0c 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java @@ -10,8 +10,7 @@ package org.elasticsearch.exponentialhistogram; /** - * Iterates over two sets of buckets in parallel, bringing them to the same scale - * and merging buckets which exist in both. + * An iterator that merges two bucket iterators, aligning them to a common scale and combining buckets with the same index. */ final class MergingBucketIterator implements ExponentialHistogram.BucketIterator { @@ -22,6 +21,13 @@ final class MergingBucketIterator implements ExponentialHistogram.BucketIterator private long currentIndex; private long currentCount; + /** + * Creates a new merging iterator. + * + * @param itA the first iterator to merge + * @param itB the second iterator to merge + * @param targetScale the histogram scale to which both iterators should be aligned + */ MergingBucketIterator(ExponentialHistogram.BucketIterator itA, ExponentialHistogram.BucketIterator itB, int targetScale) { this.itA = new ScaleAdjustingBucketIterator(itA, targetScale); this.itB = new ScaleAdjustingBucketIterator(itB, targetScale); @@ -85,7 +91,7 @@ public int scale() { private void assertEndNotReached() { if (endReached) { - throw new IllegalStateException("No more buckets"); + throw new IllegalStateException("Iterator has no more buckets"); } } } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java index a619fcd70d482..82f3141fe221f 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java @@ -12,9 +12,8 @@ import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.adjustScale; /** - * Iterates over buckets while also adjusting the scale. - * When scaling down, this can cause multiple buckets to collapse into a single one. - * This iterator ensures that they are properly merged in this case. + * An iterator that wraps another bucket iterator and adjusts its scale. + * When scaling down, multiple buckets can collapse into a single one. This iterator ensures they are merged correctly. */ final class ScaleAdjustingBucketIterator implements ExponentialHistogram.BucketIterator { @@ -25,6 +24,12 @@ final class ScaleAdjustingBucketIterator implements ExponentialHistogram.BucketI private long currentCount; boolean hasNextValue; + /** + * Creates a new scale-adjusting iterator. + * + * @param delegate the iterator to wrap + * @param targetScale the target scale for the new iterator + */ ScaleAdjustingBucketIterator(ExponentialHistogram.BucketIterator delegate, int targetScale) { this.delegate = delegate; scaleAdjustment = targetScale - delegate.scale(); @@ -67,7 +72,7 @@ public void advance() { private void assertEndNotReached() { if (hasNextValue == false) { - throw new IllegalStateException("no more buckets available"); + throw new IllegalStateException("Iterator has no more buckets"); } } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java index 803dc92057a87..d1e8592240224 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java @@ -9,24 +9,49 @@ package org.elasticsearch.exponentialhistogram; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_SCALE; import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.compareLowerBoundaries; import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.computeIndex; import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.getLowerBucketBoundary; -import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE; +/** + * Represents the bucket for values around zero in an exponential histogram. + * The range of this bucket is {@code [-zeroThreshold, +zeroThreshold]}. + * + * @param index The index used with the scale to determine the zero threshold. + * @param scale The scale used with the index to determine the zero threshold. + * @param count The number of values in the zero bucket. + */ public record ZeroBucket(long index, int scale, long count) { - private static final ZeroBucket MINIMAL_EMPTY = new ZeroBucket(MIN_INDEX, Integer.MIN_VALUE / 256, 0); + // A singleton for an empty zero bucket with the smallest possible threshold. + private static final ZeroBucket MINIMAL_EMPTY = new ZeroBucket(MIN_INDEX, MIN_SCALE, 0); + /** + * Creates a new zero bucket with a specific threshold and count. + * + * @param zeroThreshold The threshold defining the bucket's range [-zeroThreshold, +zeroThreshold]. + * @param count The number of values in the bucket. + */ public ZeroBucket(double zeroThreshold, long count) { this(computeIndex(zeroThreshold, MAX_SCALE) + 1, MAX_SCALE, count); } + /** + * @return A singleton instance of an empty zero bucket with the smallest possible threshold. + */ public static ZeroBucket minimalEmpty() { return MINIMAL_EMPTY; } + /** + * Creates a zero bucket with the smallest possible threshold and a given count. + * + * @param count The number of values in the bucket. + * @return A new {@link ZeroBucket}. + */ public static ZeroBucket minimalWithCount(long count) { if (count == 0) { return MINIMAL_EMPTY; @@ -36,16 +61,21 @@ public static ZeroBucket minimalWithCount(long count) { } /** - * Merges this zero-bucket with a given other one: - * * If the other zero-bucket is empty, the current one is returned unchanged - * * Otherwise the zero-threshold is increased if required and the counts are summed up + * Merges this zero bucket with another one. + *
    + *
  • If the other zero bucket is empty, this instance is returned unchanged.
  • + *
  • Otherwise, the zero threshold is increased if necessary (by taking the maximum of the two), and the counts are summed.
  • + *
+ * + * @param other The other zero bucket to merge with. + * @return A new {@link ZeroBucket} representing the merged result. */ public ZeroBucket merge(ZeroBucket other) { if (other.count == 0) { return this; } else { long totalCount = count + other.count; - // both are populate, we need to use the higher zero-threshold + // Both are populated, so we need to use the higher zero-threshold. if (this.compareZeroThreshold(other) >= 0) { return new ZeroBucket(index, scale, totalCount); } else { @@ -54,6 +84,13 @@ public ZeroBucket merge(ZeroBucket other) { } } + /** + * Collapses all buckets from the given iterators whose lower boundaries are smaller than the zero threshold. + * The iterators are advanced to point at the first, non-collapsed bucket. + * + * @param bucketIterators The iterators whose buckets may be collapsed. + * @return A potentially updated {@link ZeroBucket} with the collapsed buckets' counts and an adjusted threshold. + */ public ZeroBucket collapseOverlappingBuckets(ExponentialHistogram.BucketIterator... bucketIterators) { ZeroBucket current = this; ZeroBucket previous; @@ -66,17 +103,29 @@ public ZeroBucket collapseOverlappingBuckets(ExponentialHistogram.BucketIterator return current; } + /** + * Compares the zero threshold of this bucket with another one. + * + * @param other The other zero bucket to compare against. + * @return A negative integer, zero, or a positive integer if this bucket's threshold is less than, equal to, or greater than the other's. + */ public int compareZeroThreshold(ZeroBucket other) { return compareLowerBoundaries(index, scale, other.index, other.scale); } + /** + * @return The value of the zero threshold. + */ public double zeroThreshold() { return getLowerBucketBoundary(index, scale); } /** - * Collapses all buckets from the given iterator whose lower boundary is smaller than the zero threshold. + * Collapses all buckets from the given iterator whose lower boundaries are smaller than the zero threshold. * The iterator is advanced to point at the first, non-collapsed bucket. + * + * @param buckets The iterator whose buckets may be collapsed. + * @return A potentially updated {@link ZeroBucket} with the collapsed buckets' counts and an adjusted threshold. */ public ZeroBucket collapseOverlappingBuckets(ExponentialHistogram.BucketIterator buckets) { @@ -94,7 +143,7 @@ public ZeroBucket collapseOverlappingBuckets(ExponentialHistogram.BucketIterator // +1 because we need to adjust the zero threshold to the upper boundary of the collapsed bucket long collapsedUpperBoundIndex = Math.addExact(highestCollapsedIndex, 1); if (compareLowerBoundaries(index, scale, collapsedUpperBoundIndex, buckets.scale()) >= 0) { - // we still have a larger zero-threshold than the largest collapsed bucket's upper boundary + // Our current zero-threshold is larger than the upper boundary of the largest collapsed bucket, so we keep it. return new ZeroBucket(index, scale, newZeroCount); } else { return new ZeroBucket(collapsedUpperBoundIndex, buckets.scale(), newZeroCount); diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGeneratorTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGeneratorTests.java index 606acda310c55..f0c13b3190aca 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGeneratorTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGeneratorTests.java @@ -17,7 +17,7 @@ public class ExponentialHistogramGeneratorTests extends ESTestCase { public void testVeryLargeValue() { - double value = Double.MAX_VALUE/10; + double value = Double.MAX_VALUE / 10; ExponentialHistogram histo = ExponentialHistogramGenerator.createFor(value); long index = histo.positiveBuckets().peekIndex(); diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtilsTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtilsTests.java index ce47c73acfef8..34d20d099532f 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtilsTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtilsTests.java @@ -71,7 +71,7 @@ public void testRandomValueIndexing() { // generate values in the range 10^-100 to 10^100 double exponent = rnd.nextDouble() * 200 - 100; double testValue = Math.pow(10, exponent); - int scale = rnd.nextInt(MIN_SCALE/2, MAX_SCALE/2); + int scale = rnd.nextInt(MIN_SCALE / 2, MAX_SCALE / 2); long index = computeIndex(testValue, scale); double lowerBound = getLowerBucketBoundary(index, scale); @@ -160,13 +160,12 @@ public void testUpscalingAccuracy() { for (int i = 0; i < 10_000; i++) { int startScale = rnd.nextInt(minScale, maxScale); - int scaleIncrease = rnd.nextInt(1, maxScale-startScale + 1); + int scaleIncrease = rnd.nextInt(1, maxScale - startScale + 1); long index = MAX_INDEX >> scaleIncrease >> (int) (rnd.nextDouble() * (MAX_INDEX_BITS - scaleIncrease)); index = Math.max(1, index); index = (long) (rnd.nextDouble() * index) * (rnd.nextBoolean() ? 1 : -1); - double midPoint = getPointOfLeastRelativeError(index, startScale); // limit the numeric range, otherwise we get rounding errors causing the test to fail while (midPoint > Math.pow(10, 10) || midPoint < Math.pow(10, -10)) { From a980c0e8866e6d265a81aea1c44ed30edb50fa87 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Tue, 15 Jul 2025 13:08:33 +0200 Subject: [PATCH 19/32] Readme bullet points --- libs/exponential-histogram/README.md | 34 ++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 libs/exponential-histogram/README.md diff --git a/libs/exponential-histogram/README.md b/libs/exponential-histogram/README.md new file mode 100644 index 0000000000000..52fa96f0e2381 --- /dev/null +++ b/libs/exponential-histogram/README.md @@ -0,0 +1,34 @@ +* Implementation of merging and analysis algorithms for exponential histograms based on the [OpenTelemetry definition](https://opentelemetry.io/docs/specs/otel/metrics/data-model/#exponentialhistogram) +* Uses a sparse storage: Only populated buckets consume memory and count towards the bucket limit +* In contrast, the OpenTelemetry implementation uses a dense storage +* Dense storage allows for O(1) time for insertion of individual values, sparse requires O( log m) where m is the bucket capacity +* Sparse representation allows a more efficient storage and also for a simple merging algorithm with a runtime linear in the number of populated buckets +* Sparse storage can almost exactly represent distributions with less distinct values than bucket count, which allows us to use this implementation also for explicit bucket histograms + +# Merging algorithm + + * Works very similar to the merge-step of merge sort: We iterate simultaneously over buckets from both histograms, merging buckets as needed + * If the merged buckets exceed the configurable bucket count limit, we scale down as needed + * We respect the zero-threshold of the zero buckets. We merge the zero threshold from both histograms and collapse any overlapping buckets into the zero bucket + * In addition to not have single, malformed histograms drag down the accuracy, we also increase the scale of the histogram to aggregate if necessary (link to upscaling section) + +## Upscaling + + * We assume that all values in a bucket lie on a single point: the point of least relative error (TBD add definiton from code here) + * This allows us to increase the scale of histograms without increasing the bucket count. Buckets are simply mapped to the ones in the new scale containing the point of least relative error of the original buckets + * This can introduce a small error, as the original center might be moved a little, therefore we ensure that the upscaling happens at most once to not have the errors add-up + * The higher the amount of upscaling, the less the error (higher scale means smaller buckets, which in turn means we get a better fit around the original point of least relative error) + +# Distributions with fewer distinct values than the bucket count +* The sparse storage only requires memory linear to the total number of buckets, dense storage in needs to store the entire range of the smallest and biggest buckets. +* If we have at least as many buckets as we have distinct values to store in the histogram, we can almost exactly represent this distribution +* We can set the scale to the maximum supported value (so the buckets become the smallest) +* At the time of writing the maximum scale is 38, so the relative distance between the lower and upper bucket boundaries are (2^2(-38)) +* In otherwords : If we store for example a duration value of 10^15 nano seconds (= roughly 11.5 days), this value will be stored in a bucket which guarantees a relative error of at most 2^2(-38), so 2.5 microseconds in this case +* We can make use of this property to convert explicit bucket histograms (https://opentelemetry.io/docs/specs/otel/metrics/data-model/#histogram) to exponential ones by again assuming that all values in a bucket lie in a single point: + * For each explicit bucket, we take its point of least relative error and add it to the corresponding exponential histogram bucket with the corresponding count + * The open, upper and lower buckets including infinity will need a special treatment, but these are not useful for percentile estimates anyway +* This gives us a great solution for universally dealing with histograms: + * When merging exponential histograms generated from explicit ones, the result is exact as long as the number of distinct buckets from the original explicit bucket histograms does not exceed the exponential histogram bucket count + * As a result, the computed percentiles will be exact with only the error of the original conversion + * In addition this allows us to compute percentiles on mixed explicit bucket histograms or even mixing them with exponential ones by just using the exponential histogram algorithms From a46914a2db3f2818826452c78b0a81086064ce59 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Tue, 15 Jul 2025 14:04:46 +0200 Subject: [PATCH 20/32] Add readme --- libs/exponential-histogram/README.md | 93 ++++++++++++++++++---------- 1 file changed, 60 insertions(+), 33 deletions(-) diff --git a/libs/exponential-histogram/README.md b/libs/exponential-histogram/README.md index 52fa96f0e2381..d7c37467bd4aa 100644 --- a/libs/exponential-histogram/README.md +++ b/libs/exponential-histogram/README.md @@ -1,34 +1,61 @@ -* Implementation of merging and analysis algorithms for exponential histograms based on the [OpenTelemetry definition](https://opentelemetry.io/docs/specs/otel/metrics/data-model/#exponentialhistogram) -* Uses a sparse storage: Only populated buckets consume memory and count towards the bucket limit -* In contrast, the OpenTelemetry implementation uses a dense storage -* Dense storage allows for O(1) time for insertion of individual values, sparse requires O( log m) where m is the bucket capacity -* Sparse representation allows a more efficient storage and also for a simple merging algorithm with a runtime linear in the number of populated buckets -* Sparse storage can almost exactly represent distributions with less distinct values than bucket count, which allows us to use this implementation also for explicit bucket histograms - -# Merging algorithm - - * Works very similar to the merge-step of merge sort: We iterate simultaneously over buckets from both histograms, merging buckets as needed - * If the merged buckets exceed the configurable bucket count limit, we scale down as needed - * We respect the zero-threshold of the zero buckets. We merge the zero threshold from both histograms and collapse any overlapping buckets into the zero bucket - * In addition to not have single, malformed histograms drag down the accuracy, we also increase the scale of the histogram to aggregate if necessary (link to upscaling section) - -## Upscaling - - * We assume that all values in a bucket lie on a single point: the point of least relative error (TBD add definiton from code here) - * This allows us to increase the scale of histograms without increasing the bucket count. Buckets are simply mapped to the ones in the new scale containing the point of least relative error of the original buckets - * This can introduce a small error, as the original center might be moved a little, therefore we ensure that the upscaling happens at most once to not have the errors add-up - * The higher the amount of upscaling, the less the error (higher scale means smaller buckets, which in turn means we get a better fit around the original point of least relative error) - -# Distributions with fewer distinct values than the bucket count -* The sparse storage only requires memory linear to the total number of buckets, dense storage in needs to store the entire range of the smallest and biggest buckets. -* If we have at least as many buckets as we have distinct values to store in the histogram, we can almost exactly represent this distribution -* We can set the scale to the maximum supported value (so the buckets become the smallest) -* At the time of writing the maximum scale is 38, so the relative distance between the lower and upper bucket boundaries are (2^2(-38)) -* In otherwords : If we store for example a duration value of 10^15 nano seconds (= roughly 11.5 days), this value will be stored in a bucket which guarantees a relative error of at most 2^2(-38), so 2.5 microseconds in this case -* We can make use of this property to convert explicit bucket histograms (https://opentelemetry.io/docs/specs/otel/metrics/data-model/#histogram) to exponential ones by again assuming that all values in a bucket lie in a single point: +This library provides an implementation of merging and analysis algorithms for exponential histograms based on the [OpenTelemetry definition](https://opentelemetry.io/docs/specs/otel/metrics/data-model/#exponentialhistogram). It is designed as a complementary tool to the OpenTelemetry SDK, focusing specifically on efficient histogram merging and accurate percentile estimation. + +## Overview + +The library implements a sparse storage approach where only populated buckets consume memory and count towards the bucket limit. This differs from the OpenTelemetry implementation, which uses dense storage. While dense storage allows for O(1) time insertion of individual values, our sparse representation requires O(log m) time where m is the bucket capacity. However, the sparse representation enables more efficient storage and provides a simple merging algorithm with runtime linear in the number of populated buckets. In addition, this library also provides an array-backed sparse storage, ensuring cache efficiency. + +The sparse storage approach offers significant advantages for [distributions with fewer distinct values](#distributions-with-few-distinct-values) than the bucket count, allowing the library to achieve near-exact representation of such distributions. This makes it suitable not only for exponential histograms but also as a universal solution for handling explicit bucket histograms. + +## Merging Algorithm + +The merging algorithm works similarly to the merge-step of merge sort. +We simultaneously walk through the buckets of both histograms in order, merging them on the fly as needed. +If the total number of buckets in the end would exceed the bucket limit, we scale down as needed. + +Before we merge the buckets, we need to take care of the special zero-bucket and bring both histograms to the same scale. + +For the zero-bucket, we merge the zero threshold from both histograms and collapse any overlapping buckets into the resulting new zero bucket. + +In order to bring both histograms to the same scale, we can make adjustments in both directions: +We can increase or decrease the scale of histograms as needed. + +See the [upscaling section](#upscaling) for details on how the upscaling works. +Upscaling helps prevent the precision of the result histogram merged from many histograms from being dragged down to the lowest scale of a potentially misconfigured input histogram. For example, if a histogram is recorded with a too low zero threshold, this can result in a degraded scale when using dense histogram storage, even if the histogram only contains two points. + +### Upscaling + +In general, we assume that all values in a bucket lie on a single point: the point of least relative error. This is the point `x` in the bucket such that: + +``` +(x - l) / l = (u - x) / u +``` + +Where `l` is the lower bucket boundary and `u` is the upper bucket boundary. + +This assumption allows us to increase the scale of histograms without increasing the bucket count. Buckets are simply mapped to the ones in the new scale containing the point of least relative error of the original buckets. + +This can introduce a small error, as the original center might be moved slightly. Therefore, we ensure that the upscaling happens at most once to prevent errors from adding up. +The higher the amount of upscaling, the less the error (higher scale means smaller buckets, which in turn means we get a better fit around the original point of least relative error). + +## Distributions with few distinct values + +The sparse storage model only requires memory linear to the total number of buckets, while dense storage needs to store the entire range of the smallest and biggest buckets. + +This offers significant benefits for distributions with fewer distinct values: +If we have at least as many buckets as we have distinct values to store in the histogram, we can almost exactly represent this distribution. +This can be achieved by simply maintaining the scale at the maximum supported value (so the buckets become the smallest). +At the time of writing, the maximum scale is 38, so the relative distance between the lower and upper bucket boundaries is (2^2(-38)). + +This is best explained with a concrete example: +If we store, for example, a duration value of 10^15 nano seconds (= roughly 11.5 days), this value will be stored in a bucket that guarantees a relative error of at most 2^2(-38), so 2.5 microseconds in this case. +As long as the number of values we insert is lower than the bucket count, we are guaranteed that no down-scaling happens: In contrast to dense storage, the scale does not depend on the spread between the smallest and largest bucket index. + +### Handling Explicit Bucket Histograms + +We can make use of this property to convert explicit bucket histograms (https://opentelemetry.io/docs/specs/otel/metrics/data-model/#histogram) to exponential ones by again assuming that all values in a bucket lie in a single point: * For each explicit bucket, we take its point of least relative error and add it to the corresponding exponential histogram bucket with the corresponding count - * The open, upper and lower buckets including infinity will need a special treatment, but these are not useful for percentile estimates anyway -* This gives us a great solution for universally dealing with histograms: - * When merging exponential histograms generated from explicit ones, the result is exact as long as the number of distinct buckets from the original explicit bucket histograms does not exceed the exponential histogram bucket count - * As a result, the computed percentiles will be exact with only the error of the original conversion - * In addition this allows us to compute percentiles on mixed explicit bucket histograms or even mixing them with exponential ones by just using the exponential histogram algorithms + * The open, upper, and lower buckets, including infinity, will need special treatment, but these are not useful for percentile estimates anyway + +This gives us a great solution for universally dealing with histograms: +When merging exponential histograms generated from explicit ones, the result is exact as long as the number of distinct buckets from the original explicit bucket histograms does not exceed the exponential histogram bucket count. As a result, the computed percentiles will be exact with only the error of the original conversion. +In addition, this allows us to compute percentiles on mixed explicit bucket histograms or even mix them with exponential ones by just using the exponential histogram algorithms. From fefd39bce8804f4b9c1e9c280c494b03c481f0cc Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Tue, 15 Jul 2025 15:27:36 +0200 Subject: [PATCH 21/32] Add testcase verifying index limits are not exceeded on upscaling --- .../ExponentialHistogramMergerTests.java | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java index e4feb10e52329..19f5134f4cc34 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java @@ -19,6 +19,9 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX; +import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.adjustScale; import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.equalTo; @@ -89,6 +92,32 @@ public void testEmptyZeroBucketIgnored() { assertThat(posBuckets.hasNext(), equalTo(false)); } + public void testUpscalingDoesNotExceedIndexLimits() { + for (int i = 0; i < 4; i++) { + + boolean isPositive = i % 2 == 0; + boolean useMinIndex = i > 1; + + FixedCapacityExponentialHistogram histo = new FixedCapacityExponentialHistogram(2); + histo.resetBuckets(20); + + long index = useMinIndex ? MIN_INDEX / 2 : MAX_INDEX / 2; + + histo.tryAddBucket(index, 1, isPositive); + + ExponentialHistogramMerger merger = new ExponentialHistogramMerger(100); + merger.add(histo); + ExponentialHistogram result = merger.get(); + + assertThat(result.scale(), equalTo(21)); + if (isPositive) { + assertThat(result.positiveBuckets().peekIndex(), equalTo(adjustScale(index, 20, 1))); + } else { + assertThat(result.negativeBuckets().peekIndex(), equalTo(adjustScale(index, 20, 1))); + } + } + } + /** * Verify that the resulting histogram is independent of the order of elements and therefore merges performed. */ From ac804c77d78de2d77c38404c171994985af41d47 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Tue, 15 Jul 2025 15:49:45 +0200 Subject: [PATCH 22/32] Replaced upscaling floating point arithmetic with faster and more accurate long arithmetic --- .../ExponentialScaleUtils.java | 115 +++++++++--------- .../ExponentialScaleUtilsTests.java | 6 +- 2 files changed, 64 insertions(+), 57 deletions(-) diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java index a0f21790e8e89..9cc10824b6adb 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java @@ -27,59 +27,59 @@ public class ExponentialScaleUtils { * For each scale from {@link ExponentialHistogram#MIN_SCALE} to {@link ExponentialHistogram#MAX_SCALE}, * the table contains a pre-computed constant for up-scaling bucket indices. * The constant is computed using the following formula: - * {@code (1 + 2^scale * (1 - log2(1 + 2^(2^-scale))))} + * {@code 2^63 * (1 + 2^scale * (1 - log2(1 + 2^(2^-scale))))} */ - static final double[] SCALE_UP_CONSTANT_TABLE = new double[] { - 4.8828125E-4, - 9.765625E-4, - 0.001953125, - 0.00390625, - 0.0078125, - 0.015625, - 0.03124999998950301, - 0.06249862414928998, - 0.12429693135076524, - 0.22813428968741514, - 0.33903595255631885, - 0.4150374992788438, - 0.45689339367277604, - 0.47836619809201575, - 0.4891729613112115, - 0.49458521106164327, - 0.497292446757125, - 0.4986462035295225, - 0.4993230992835585, - 0.4996615493316266, - 0.49983077462704417, - 0.49991538730867596, - 0.4999576936537322, - 0.4999788468267904, - 0.4999894234133857, - 0.4999947117066917, - 0.4999973558533457, - 0.49999867792667285, - 0.4999993389633364, - 0.4999996694816682, - 0.4999998347408341, - 0.49999991737041705, - 0.4999999586852085, - 0.49999997934260426, - 0.49999998967130216, - 0.49999999483565105, - 0.4999999974178255, - 0.49999999870891276, - 0.4999999993544564, - 0.4999999996772282, - 0.4999999998386141, - 0.49999999991930705, - 0.49999999995965355, - 0.49999999997982675, - 0.4999999999899134, - 0.4999999999949567, - 0.49999999999747835, - 0.4999999999987392, - 0.49999999999936956, - 0.4999999999996848 }; + static final long[] SCALE_UP_CONSTANT_TABLE = new long[] { + 4503599627370495L, + 9007199254740991L, + 18014398509481983L, + 36028797018963967L, + 72057594037927935L, + 144115188075855871L, + 288230376054894118L, + 576448062320457790L, + 1146436840887505800L, + 2104167428150631728L, + 3127054724296373505L, + 3828045265094622256L, + 4214097751025163417L, + 4412149414858430624L, + 4511824212543271281L, + 4561743405547877994L, + 4586713247558758689L, + 4599199449917992829L, + 4605442711287634239L, + 4608564361996858084L, + 4610125189854540715L, + 4610905604096266504L, + 4611295811256239977L, + 4611490914841115537L, + 4611588466634164420L, + 4611637242530765249L, + 4611661630479075212L, + 4611673824453231387L, + 4611679921440309624L, + 4611682969933848761L, + 4611684494180618332L, + 4611685256304003118L, + 4611685637365695511L, + 4611685827896541707L, + 4611685923161964805L, + 4611685970794676354L, + 4611685994611032129L, + 4611686006519210016L, + 4611686012473298960L, + 4611686015450343432L, + 4611686016938865668L, + 4611686017683126786L, + 4611686018055257345L, + 4611686018241322624L, + 4611686018334355264L, + 4611686018380871584L, + 4611686018404129744L, + 4611686018415758824L, + 4611686018421573364L, + 4611686018424480634L }; /** * Computes the new index for a bucket when adjusting the scale of the histogram. @@ -95,16 +95,21 @@ static long adjustScale(long index, int currentScale, int scaleAdjustment) { if (scaleAdjustment <= 0) { return index >> -scaleAdjustment; } else { + if (scaleAdjustment > MAX_INDEX_BITS) { + throw new IllegalArgumentException("Scaling up more than " + MAX_INDEX_BITS + " does not make sense"); + } // When scaling up, we want to return the bucket containing the point of least relative error. // This bucket index can be computed as (index << adjustment) + offset. // The offset is a constant that depends only on the scale and adjustment, not the index. // The mathematically correct formula for the offset is: // 2^adjustment * (1 + 2^currentScale * (1 - log2(1 + 2^(2^-currentScale)))) // This is hard to compute with double-precision floating-point numbers due to rounding errors and is also expensive. - // Therefore, we precompute (1 + 2^currentScale * (1 - log2(1 + 2^(2^-currentScale)))) and store it + // Therefore, we precompute 2^63 * (1 + 2^currentScale * (1 - log2(1 + 2^(2^-currentScale)))) and store it // in SCALE_UP_CONSTANT_TABLE for each scale. - double offset = Math.scalb(SCALE_UP_CONSTANT_TABLE[currentScale - MIN_SCALE], scaleAdjustment); - return (index << scaleAdjustment) + (long) Math.floor(offset); + // This can then be converted to the correct offset by dividing with (2^(63-adjustment)), + // which is equivalent to a right shift with (63-adjustment) + long offset = SCALE_UP_CONSTANT_TABLE[currentScale - MIN_SCALE] >> (63 - scaleAdjustment); + return (index << scaleAdjustment) + offset; } } diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtilsTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtilsTests.java index 34d20d099532f..5e4ddaef65246 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtilsTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtilsTests.java @@ -15,6 +15,7 @@ import java.math.BigDecimal; import java.math.MathContext; +import java.math.RoundingMode; import java.util.Random; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX; @@ -182,15 +183,16 @@ public void testUpscalingAccuracy() { public void testScaleUpTableUpToDate() { - MathContext mc = new MathContext(200); + MathContext mc = new MathContext(1000); BigDecimal one = new BigDecimal(1, mc); BigDecimal two = new BigDecimal(2, mc); for (int scale = MIN_SCALE; scale <= MAX_SCALE; scale++) { BigDecimal base = BigDecimalMath.pow(two, two.pow(-scale, mc), mc); BigDecimal factor = one.add(two.pow(scale, mc).multiply(one.subtract(BigDecimalMath.log2(one.add(base), mc)))); - assertThat(SCALE_UP_CONSTANT_TABLE[scale - MIN_SCALE], equalTo(factor.doubleValue())); + BigDecimal scaledFactor = factor.multiply(two.pow(63, mc)).setScale(0, RoundingMode.FLOOR); + assertThat(SCALE_UP_CONSTANT_TABLE[scale - MIN_SCALE], equalTo(scaledFactor.longValue())); } } From 5e1ca08bc49440bd9c7a12d4b32f7521e40a0b5e Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Thu, 17 Jul 2025 09:43:45 +0200 Subject: [PATCH 23/32] Readme fixes and clarifications --- libs/exponential-histogram/README.md | 71 ++++++++++++++++++++-------- 1 file changed, 51 insertions(+), 20 deletions(-) diff --git a/libs/exponential-histogram/README.md b/libs/exponential-histogram/README.md index d7c37467bd4aa..9ea66fd7cb8c4 100644 --- a/libs/exponential-histogram/README.md +++ b/libs/exponential-histogram/README.md @@ -2,25 +2,21 @@ This library provides an implementation of merging and analysis algorithms for e ## Overview -The library implements a sparse storage approach where only populated buckets consume memory and count towards the bucket limit. This differs from the OpenTelemetry implementation, which uses dense storage. While dense storage allows for O(1) time insertion of individual values, our sparse representation requires O(log m) time where m is the bucket capacity. However, the sparse representation enables more efficient storage and provides a simple merging algorithm with runtime linear in the number of populated buckets. In addition, this library also provides an array-backed sparse storage, ensuring cache efficiency. +The library implements a sparse storage approach where only populated buckets consume memory and count towards the bucket limit. This differs from the OpenTelemetry implementation, which uses dense storage. While dense storage allows for O(1) time insertion of individual values, our sparse representation requires O(log m) time where m is the bucket capacity. However, the sparse representation enables more efficient storage and provides a simple merging algorithm with runtime linear in the number of populated buckets. Additionally, this library also provides an array-backed sparse storage, ensuring cache efficiency. -The sparse storage approach offers significant advantages for [distributions with fewer distinct values](#distributions-with-few-distinct-values) than the bucket count, allowing the library to achieve near-exact representation of such distributions. This makes it suitable not only for exponential histograms but also as a universal solution for handling explicit bucket histograms. +The sparse storage approach offers significant advantages for distributions with fewer distinct values than the bucket count, allowing the library to achieve representation of such distributions with an error so small that it won't be noticed in practice. This makes it suitable not only for exponential histograms but also as a universal solution for handling explicit bucket histograms. ## Merging Algorithm -The merging algorithm works similarly to the merge-step of merge sort. -We simultaneously walk through the buckets of both histograms in order, merging them on the fly as needed. -If the total number of buckets in the end would exceed the bucket limit, we scale down as needed. +The merging algorithm works similarly to the merge-step of merge sort. We simultaneously walk through the buckets of both histograms in order, merging them on the fly as needed. If the total number of buckets in the end would exceed the bucket limit, we scale down as needed. Before we merge the buckets, we need to take care of the special zero-bucket and bring both histograms to the same scale. For the zero-bucket, we merge the zero threshold from both histograms and collapse any overlapping buckets into the resulting new zero bucket. -In order to bring both histograms to the same scale, we can make adjustments in both directions: -We can increase or decrease the scale of histograms as needed. +In order to bring both histograms to the same scale, we can make adjustments in both directions: we can increase or decrease the scale of histograms as needed. -See the [upscaling section](#upscaling) for details on how the upscaling works. -Upscaling helps prevent the precision of the result histogram merged from many histograms from being dragged down to the lowest scale of a potentially misconfigured input histogram. For example, if a histogram is recorded with a too low zero threshold, this can result in a degraded scale when using dense histogram storage, even if the histogram only contains two points. +See the [upscaling section](#upscaling) for details on how the upscaling works. Upscaling helps prevent the precision of the result histogram merged from many histograms from being dragged down to the lowest scale of a potentially misconfigured input histogram. For example, if a histogram is recorded with a too low zero threshold, this can result in a degraded scale when using dense histogram storage, even if the histogram only contains two points. ### Upscaling @@ -34,28 +30,63 @@ Where `l` is the lower bucket boundary and `u` is the upper bucket boundary. This assumption allows us to increase the scale of histograms without increasing the bucket count. Buckets are simply mapped to the ones in the new scale containing the point of least relative error of the original buckets. -This can introduce a small error, as the original center might be moved slightly. Therefore, we ensure that the upscaling happens at most once to prevent errors from adding up. -The higher the amount of upscaling, the less the error (higher scale means smaller buckets, which in turn means we get a better fit around the original point of least relative error). +This can introduce a small error, as the original center might be moved slightly. Therefore, we ensure that the upscaling happens at most once to prevent errors from adding up. The higher the amount of upscaling, the less the error (higher scale means smaller buckets, which in turn means we get a better fit around the original point of least relative error). -## Distributions with few distinct values +## Distributions with Few Distinct Values The sparse storage model only requires memory linear to the total number of buckets, while dense storage needs to store the entire range of the smallest and biggest buckets. This offers significant benefits for distributions with fewer distinct values: -If we have at least as many buckets as we have distinct values to store in the histogram, we can almost exactly represent this distribution. -This can be achieved by simply maintaining the scale at the maximum supported value (so the buckets become the smallest). -At the time of writing, the maximum scale is 38, so the relative distance between the lower and upper bucket boundaries is (2^2(-38)). +If we have at least as many buckets as we have distinct values to store in the histogram, we can represent this distribution with a much smaller error than the dense representation. +This can be achieved by maintaining the scale at the maximum supported value (so the buckets become the smallest). +At the time of writing, the maximum scale is 38, so the relative distance between the lower and upper bucket boundaries is (2^2^(-38)). -This is best explained with a concrete example: -If we store, for example, a duration value of 10^15 nano seconds (= roughly 11.5 days), this value will be stored in a bucket that guarantees a relative error of at most 2^2(-38), so 2.5 microseconds in this case. +The impact of the error is best shown with a concrete example: +If we store, for example, a duration value of 10^15 nanoseconds (= roughly 11.5 days), this value will be stored in a bucket that guarantees a relative error of at most 2^2^(-38), so roughly 2.5 microseconds in this case. As long as the number of values we insert is lower than the bucket count, we are guaranteed that no down-scaling happens: In contrast to dense storage, the scale does not depend on the spread between the smallest and largest bucket index. +To clarify the difference between dense and sparse storage, let's assume that we have an empty histogram and the maximum scale is zero while the maximum bucket count is four. +The same logic applies to higher scales and bucket counts, but we use these values to get easier numbers for this example. +The scale of zero means that our bucket boundaries are `1, 2, 4, 8, 16, 32, 64, 128, 256, ...`. +We now want to insert the value `6` into the histogram. The dense storage works by storing an array for the bucket counts plus an initial offset. +This means that the first slot in the bucket counts array corresponds to the bucket with index `offset` and the last one to `offset + bucketCounts.length - 1`. +So if we add the value `6` to the histogram, it falls into the `(4,8]` bucket, which has the index `2`. + +So our dense histogram looks like this: +``` +offset = 2 +bucketCounts = [1, 0, 0, 0] // represent bucket counts for bucket index 2 to 5 +``` + +If we now insert the value `20` (`(16,32]`, bucket index 4), everything is still fine: +``` +offset = 2 +bucketCounts = [1, 0, 1, 0] // represent bucket counts for bucket index 2 to 5 +``` + +However, we run into trouble if we insert the value `100`, which corresponds to index 6: That index is outside of the bounds of our array. +We can't just increase the `offset`, because the first bucket in our array is populated too. +We have no other option other than decreasing the scale of the histogram, to make sure that our values `6` and `100` fall in the range of four **consecutive** buckets due to the bucket count limit of the dense storage. + +In contrast, a sparse histogram has no trouble storing this data while keeping the scale of zero: +``` +bucketIndiciesToCounts: { + "2" : 1, + "4" : 1, + "6" : 1 +} +``` + +Downscaling on the sparse representation only happens if either: + * The number of populated buckets would become bigger than our maximum bucket count. We have to downscale to make neighboring, populated buckets combine to a single bucket until we are below our limit again. + * The highest or smallest indices require more bits to store than we allow. This does not happen in our implementation for normal inputs, because we allow up to 62 bits for index storage, which fits the entire numeric range of IEEE 754 double precision floats at our maximum scale. + ### Handling Explicit Bucket Histograms We can make use of this property to convert explicit bucket histograms (https://opentelemetry.io/docs/specs/otel/metrics/data-model/#histogram) to exponential ones by again assuming that all values in a bucket lie in a single point: - * For each explicit bucket, we take its point of least relative error and add it to the corresponding exponential histogram bucket with the corresponding count - * The open, upper, and lower buckets, including infinity, will need special treatment, but these are not useful for percentile estimates anyway + * For each explicit bucket, we take its point of least relative error and add it to the corresponding exponential histogram bucket with the corresponding count. + * The open, upper, and lower buckets, including infinity, will need special treatment, but these are not useful for percentile estimates anyway. This gives us a great solution for universally dealing with histograms: -When merging exponential histograms generated from explicit ones, the result is exact as long as the number of distinct buckets from the original explicit bucket histograms does not exceed the exponential histogram bucket count. As a result, the computed percentiles will be exact with only the error of the original conversion. +When merging exponential histograms generated from explicit ones, the scale is not decreased (and therefore the error not increased) as long as the number of distinct buckets from the original explicit bucket histograms does not exceed the exponential histogram bucket count. As a result, the computed percentiles will be precise with only the [relative error of the initial conversion](#distributions-with-few-distinct-values). In addition, this allows us to compute percentiles on mixed explicit bucket histograms or even mix them with exponential ones by just using the exponential histogram algorithms. From c0917580181f6728301997c2f3ec150e5c7c3129 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Thu, 17 Jul 2025 11:08:46 +0200 Subject: [PATCH 24/32] Review fixes --- libs/exponential-histogram/README.md | 11 ++++ .../exponentialhistogram/DownscaleStats.java | 28 +++++++-- .../ExponentialHistogram.java | 7 ++- .../ExponentialHistogramGenerator.java | 6 +- .../ExponentialHistogramMerger.java | 39 +++++++------ .../ExponentialScaleUtils.java | 57 ++++++++++++++++--- .../exponentialhistogram/ZeroBucket.java | 14 ++--- .../DownscaleStatsTests.java | 14 ++--- .../ExponentialHistogramMergerTests.java | 2 +- .../ExponentialScaleUtilsTests.java | 27 +++++---- 10 files changed, 142 insertions(+), 63 deletions(-) diff --git a/libs/exponential-histogram/README.md b/libs/exponential-histogram/README.md index 9ea66fd7cb8c4..1331428304f22 100644 --- a/libs/exponential-histogram/README.md +++ b/libs/exponential-histogram/README.md @@ -2,6 +2,17 @@ This library provides an implementation of merging and analysis algorithms for e ## Overview +The library implements base-2 exponential histograms with perfect subsetting. The most imporant properties are: + +* The histogram has a scale parameter, which defines the accuracy. A higher scale implies a higher accuracy. +* The `base` for the buckets is defined as `base = 2^(2^-scale)`. +* The histogram bucket at index `i` has the range `(base^i, base^(i+1)]` +* Negative values are represented by a separate negative range of buckets with the boundaries `(-base^(i+1), -base^i]` +* Histograms are perfectly subsetting: increasing the scale by one merges each pair of neighboring buckets +* A special zero bucket with a zero-threshold is used to handle zero and close-to-zero values + +For more details please refer to the [OpenTelemetry definition](https://opentelemetry.io/docs/specs/otel/metrics/data-model/#exponentialhistogram). + The library implements a sparse storage approach where only populated buckets consume memory and count towards the bucket limit. This differs from the OpenTelemetry implementation, which uses dense storage. While dense storage allows for O(1) time insertion of individual values, our sparse representation requires O(log m) time where m is the bucket capacity. However, the sparse representation enables more efficient storage and provides a simple merging algorithm with runtime linear in the number of populated buckets. Additionally, this library also provides an array-backed sparse storage, ensuring cache efficiency. The sparse storage approach offers significant advantages for distributions with fewer distinct values than the bucket count, allowing the library to achieve representation of such distributions with an error so small that it won't be noticed in practice. This makes it suitable not only for exponential histograms but also as a universal solution for handling explicit bucket histograms. diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java index cd30f705efc74..8578f11b3e3db 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java @@ -11,6 +11,10 @@ import java.util.Arrays; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX_BITS; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX; + /** * A data structure for efficiently computing the required scale reduction for a histogram to reach a target number of buckets. * This works by examining pairs of neighboring buckets and determining at which scale reduction they would merge into a single bucket. @@ -19,7 +23,7 @@ class DownscaleStats { // collapsedBucketCount[i] stores the number of additional // collapsed buckets when increasing the scale by (i+1) instead of just by (i) - int[] collapsedBucketCount = new int[63]; + int[] collapsedBucketCount = new int[MAX_INDEX_BITS]; /** * Resets the data structure to its initial state. @@ -32,6 +36,12 @@ void add(long previousBucketIndex, long currentBucketIndex) { if (currentBucketIndex <= previousBucketIndex) { throw new IllegalArgumentException("currentBucketIndex must be greater than previousBucketIndex"); } + if (currentBucketIndex < MIN_INDEX || currentBucketIndex > MAX_INDEX) { + throw new IllegalArgumentException("currentBucketIndex must be in the range [" + MIN_INDEX + "..." + MAX_INDEX + "]"); + } + if (previousBucketIndex < MIN_INDEX || previousBucketIndex > MAX_INDEX) { + throw new IllegalArgumentException("previousBucketIndex must be in the range [" + MIN_INDEX + "..." + MAX_INDEX + "]"); + } /* * Below is an efficient variant of the following algorithm: * for (int i=0; i<63; i++) { @@ -44,12 +54,12 @@ void add(long previousBucketIndex, long currentBucketIndex) { */ long bitXor = previousBucketIndex ^ currentBucketIndex; int numEqualLeadingBits = Long.numberOfLeadingZeros(bitXor); - if (numEqualLeadingBits == 0) { - // right-shifting will never make the buckets combine, because one is positive and the other is negative - return; + // if there are zero equal leading bits, the indices have a different sign. + // Therefore right-shifting will never make the buckets combine + if (numEqualLeadingBits > 0) { + int requiredScaleChange = 64 - numEqualLeadingBits; + collapsedBucketCount[requiredScaleChange - 1]++; } - int requiredScaleChange = 64 - numEqualLeadingBits; - collapsedBucketCount[requiredScaleChange - 1]++; } /** @@ -59,6 +69,9 @@ void add(long previousBucketIndex, long currentBucketIndex) { * @return the number of buckets that will be merged */ int getCollapsedBucketCountAfterScaleReduction(int reduction) { + if (reduction < 0 || reduction > MAX_INDEX_BITS) { + throw new IllegalArgumentException("reduction must be between 0 and " + (MAX_INDEX_BITS)); + } int totalCollapsed = 0; for (int i = 0; i < reduction; i++) { totalCollapsed += collapsedBucketCount[i]; @@ -73,6 +86,9 @@ int getCollapsedBucketCountAfterScaleReduction(int reduction) { * @return the required scale reduction */ int getRequiredScaleReductionToReduceBucketCountBy(int desiredCollapsedBucketCount) { + if (desiredCollapsedBucketCount < 0) { + throw new IllegalArgumentException("desiredCollapsedBucketCount must be greater than or equal to 0"); + } if (desiredCollapsedBucketCount == 0) { return 0; } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java index 78ff1bfa06b42..7ceb76158fb22 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java @@ -17,8 +17,8 @@ * This interface supports sparse implementations, allowing iteration over buckets without requiring direct index access.
* The most important properties are: *
    - *
  • The histogram has a scale parameter, which defines the accuracy. - * The {@code base} for the buckets is defined as {@code base = 2^(2^-scale)}
  • + *
  • The histogram has a scale parameter, which defines the accuracy. A higher scale implies a higher accuracy. + * The {@code base} for the buckets is defined as {@code base = 2^(2^-scale)}.
  • *
  • The histogram bucket at index {@code i} has the range {@code (base^i, base^(i+1)]}
  • *
  • Negative values are represented by a separate negative range of buckets with the boundaries {@code (-base^(i+1), -base^i]}
  • *
  • Histograms are perfectly subsetting: increasing the scale by one merges each pair of neighboring buckets
  • @@ -44,7 +44,8 @@ public interface ExponentialHistogram { // At this scale, all double values fall into a single bucket. int MIN_SCALE = -11; - // Only use 62 bits at max to allow computing the difference between the smallest and largest index without causing an overflow. + // Only use 62 bits (plus the sign bit) at max to allow computing the difference between the smallest and largest index without causing + // an overflow. // The extra bit also provides room for compact storage tricks. int MAX_INDEX_BITS = 62; long MAX_INDEX = (1L << MAX_INDEX_BITS) - 1; diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java index eeb66d6f96920..ccb5d1fe7c034 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java @@ -83,7 +83,11 @@ public ExponentialHistogram get() { * @return a new {@link ExponentialHistogram} */ public static ExponentialHistogram createFor(double... values) { - return createFor(values.length, Arrays.stream(values)); + ExponentialHistogramGenerator generator = new ExponentialHistogramGenerator(values.length); + for (double val : values) { + generator.add(val); + } + return generator.get(); } /** diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java index 352f5ae51494d..cfc90a6dc1414 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java @@ -38,12 +38,16 @@ public ExponentialHistogramMerger(int bucketLimit) { } // Only intended for testing, using this in production means an unnecessary reduction of precision - ExponentialHistogramMerger(int resultBucketCount, int minScale) { - this(resultBucketCount); + private ExponentialHistogramMerger(int bucketLimit, int minScale) { + this(bucketLimit); result.resetBuckets(minScale); buffer.resetBuckets(minScale); } + static ExponentialHistogramMerger createForTesting(int bucketLimit, int minScale) { + return new ExponentialHistogramMerger(bucketLimit, minScale); + } + /** * Merges the given histogram into the current result. * Must not be called after {@link #get()} has been called. @@ -54,22 +58,15 @@ public void add(ExponentialHistogram toAdd) { if (isFinished) { throw new IllegalStateException("get() has already been called"); } - merge(buffer, result, toAdd); - FixedCapacityExponentialHistogram temp = result; - result = buffer; - buffer = temp; + doMerge(toAdd); } /** * Returns the merged histogram. - * Must not be called multiple times. * * @return the merged histogram */ public ExponentialHistogram get() { - if (isFinished) { - throw new IllegalStateException("get() has already been called"); - } isFinished = true; return result; } @@ -80,7 +77,10 @@ public ExponentialHistogram get() { // then in O(log(b)) turn them into a single, merged histogram // (b is the number of buffered buckets) - private void merge(FixedCapacityExponentialHistogram output, ExponentialHistogram a, ExponentialHistogram b) { + private void doMerge(ExponentialHistogram b) { + + ExponentialHistogram a = result; + ExponentialHistogram.CopyableBucketIterator posBucketsA = a.positiveBuckets(); ExponentialHistogram.CopyableBucketIterator negBucketsA = a.negativeBuckets(); ExponentialHistogram.CopyableBucketIterator posBucketsB = b.positiveBuckets(); @@ -89,7 +89,7 @@ private void merge(FixedCapacityExponentialHistogram output, ExponentialHistogra ZeroBucket zeroBucket = a.zeroBucket().merge(b.zeroBucket()); zeroBucket = zeroBucket.collapseOverlappingBuckets(posBucketsA, negBucketsA, posBucketsB, negBucketsB); - output.setZeroBucket(zeroBucket); + buffer.setZeroBucket(zeroBucket); // We attempt to bring everything to the scale of A. // This might involve increasing the scale for B, which would increase its indices. @@ -117,25 +117,28 @@ private void merge(FixedCapacityExponentialHistogram output, ExponentialHistogra MergingBucketIterator positiveMerged = new MergingBucketIterator(posBucketsA.copy(), posBucketsB.copy(), targetScale); MergingBucketIterator negativeMerged = new MergingBucketIterator(negBucketsA.copy(), negBucketsB.copy(), targetScale); - output.resetBuckets(targetScale); + buffer.resetBuckets(targetScale); downscaleStats.reset(); - int overflowCount = putBuckets(output, negativeMerged, false, downscaleStats); - overflowCount += putBuckets(output, positiveMerged, true, downscaleStats); + int overflowCount = putBuckets(buffer, negativeMerged, false, downscaleStats); + overflowCount += putBuckets(buffer, positiveMerged, true, downscaleStats); if (overflowCount > 0) { // UDD-sketch approach: decrease the scale and retry. int reduction = downscaleStats.getRequiredScaleReductionToReduceBucketCountBy(overflowCount); targetScale -= reduction; - output.resetBuckets(targetScale); + buffer.resetBuckets(targetScale); positiveMerged = new MergingBucketIterator(posBucketsA, posBucketsB, targetScale); negativeMerged = new MergingBucketIterator(negBucketsA, negBucketsB, targetScale); - overflowCount = putBuckets(output, negativeMerged, false, null); - overflowCount += putBuckets(output, positiveMerged, true, null); + overflowCount = putBuckets(buffer, negativeMerged, false, null); + overflowCount += putBuckets(buffer, positiveMerged, true, null); if (overflowCount > 0) { throw new IllegalStateException("Should never happen, the histogram should have had enough space"); } } + FixedCapacityExponentialHistogram temp = result; + result = buffer; + buffer = temp; } private static int putBuckets( diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java index 9cc10824b6adb..27429c9af474d 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java @@ -11,6 +11,7 @@ import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX_BITS; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_SCALE; @@ -92,6 +93,13 @@ public class ExponentialScaleUtils { * @return the index of the bucket in the new scale */ static long adjustScale(long index, int currentScale, int scaleAdjustment) { + checkIndexAndScaleBounds(index, currentScale); + + int newScale = currentScale + scaleAdjustment; + if (newScale < MIN_SCALE || newScale > MAX_SCALE) { + throw new IllegalArgumentException("adjusted scale must be in the range [" + MIN_SCALE + "..." + MAX_SCALE + "]"); + } + if (scaleAdjustment <= 0) { return index >> -scaleAdjustment; } else { @@ -116,6 +124,7 @@ static long adjustScale(long index, int currentScale, int scaleAdjustment) { /** * Compares the lower boundaries of two buckets, which may have different scales. * This is equivalent to a mathematically correct comparison of the lower bucket boundaries. + * Note that this method allows for scales and indices of the full numeric range of the types. * * @param idxA the index of the first bucket * @param scaleA the scale of the first bucket @@ -123,13 +132,13 @@ static long adjustScale(long index, int currentScale, int scaleAdjustment) { * @param scaleB the scale of the second bucket * @return a negative integer, zero, or a positive integer as the first bucket's lower boundary is less than, equal to, or greater than the second bucket's lower boundary */ - public static int compareLowerBoundaries(long idxA, int scaleA, long idxB, int scaleB) { + public static int compareExponentiallyScaledValues(long idxA, int scaleA, long idxB, int scaleB) { if (scaleA > scaleB) { - return -compareLowerBoundaries(idxB, scaleB, idxA, scaleA); + return -compareExponentiallyScaledValues(idxB, scaleB, idxA, scaleA); } // scaleA <= scaleB int shifts = scaleB - scaleA; - int maxScaleAdjustment = getMaximumScaleIncrease(idxA); + int maxScaleAdjustment = getMaximumScaleIncreaseIgnoringIndexLimits(idxA); if (maxScaleAdjustment < shifts) { // We would overflow if we adjusted A to the scale of B. // If A is negative, scaling would produce a number less than Long.MIN_VALUE, so it is smaller than B. @@ -154,9 +163,11 @@ public static int compareLowerBoundaries(long idxA, int scaleA, long idxB, int s * @return the maximum permissible scale increase */ public static int getMaximumScaleIncrease(long index) { - if (index < MIN_INDEX || index > MAX_INDEX) { - throw new IllegalArgumentException("index must be in range [" + MIN_INDEX + ".." + MAX_INDEX + "]"); - } + checkIndexBounds(index); + return getMaximumScaleIncreaseIgnoringIndexLimits(index); + } + + private static int getMaximumScaleIncreaseIgnoringIndexLimits(long index) { if (index < 0) { index = ~index; } @@ -171,17 +182,27 @@ public static int getMaximumScaleIncrease(long index) { * @return the upper boundary of the bucket */ public static double getUpperBucketBoundary(long index, int scale) { - return getLowerBucketBoundary(index + 1, scale); + checkIndexAndScaleBounds(index, scale); + return exponentiallyScaledToDoubleValue(index + 1, scale); } /** * Returns the lower boundary of the bucket with the given index and scale. * - * @param index the index of the bucket + * @param index the index of the bucket in the [{@link ExponentialHistogram#MIN_INDEX}, {@link ExponentialHistogram#MAX_INDEX}] range. * @param scale the scale of the bucket * @return the lower boundary of the bucket */ public static double getLowerBucketBoundary(long index, int scale) { + checkIndexAndScaleBounds(index, scale); + return exponentiallyScaledToDoubleValue(index, scale); + } + + /** + * Computes (2^2^(-scale))^index, + * allowing also indices outside of the [{@link ExponentialHistogram#MIN_INDEX}, {@link ExponentialHistogram#MAX_INDEX}] range. + */ + static double exponentiallyScaledToDoubleValue(long index, int scale) { double inverseFactor = Math.scalb(LN_2, -scale); return Math.exp(inverseFactor * index); } @@ -198,6 +219,7 @@ public static double getLowerBucketBoundary(long index, int scale) { * @return the point of least relative error */ public static double getPointOfLeastRelativeError(long bucketIndex, int scale) { + checkIndexAndScaleBounds(bucketIndex, scale); double upperBound = getUpperBucketBoundary(bucketIndex, scale); double histogramBase = Math.pow(2, Math.scalb(1, -scale)); return 2 / (histogramBase + 1) * upperBound; @@ -211,9 +233,27 @@ public static double getPointOfLeastRelativeError(long bucketIndex, int scale) { * @return the index of the bucket */ public static long computeIndex(double value, int scale) { + checkScaleBounds(scale); return Indexing.computeIndex(value, scale); } + private static void checkIndexAndScaleBounds(long index, int scale) { + checkIndexBounds(index); + checkScaleBounds(scale); + } + + private static void checkScaleBounds(int scale) { + if (scale < MIN_SCALE || scale > MAX_SCALE) { + throw new IllegalArgumentException("scale must be in range [" + MIN_SCALE + ".." + MAX_SCALE + "]"); + } + } + + private static void checkIndexBounds(long index) { + if (index < MIN_INDEX || index > MAX_INDEX) { + throw new IllegalArgumentException("index must be in range [" + MIN_INDEX + ".." + MAX_INDEX + "]"); + } + } + /** * The code in this class was copied and slightly adapted from the * OpenTelemetry Base2ExponentialHistogramIndexer implementation, @@ -265,7 +305,6 @@ static long computeIndex(double value, int scale) { * Scales: Use the Logarithm Function */ private static long getIndexByLogarithm(double value, int scale) { - double scaleFactor = Math.scalb(LOG_BASE2_E, scale); return (long) Math.ceil(Math.scalb(Math.log(value) * LOG_BASE2_E, scale)) - 1; } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java index d1e8592240224..b7df8f3673cd2 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java @@ -12,9 +12,9 @@ import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_SCALE; -import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.compareLowerBoundaries; +import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.compareExponentiallyScaledValues; import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.computeIndex; -import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.getLowerBucketBoundary; +import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.exponentiallyScaledToDoubleValue; /** * Represents the bucket for values around zero in an exponential histogram. @@ -110,14 +110,14 @@ public ZeroBucket collapseOverlappingBuckets(ExponentialHistogram.BucketIterator * @return A negative integer, zero, or a positive integer if this bucket's threshold is less than, equal to, or greater than the other's. */ public int compareZeroThreshold(ZeroBucket other) { - return compareLowerBoundaries(index, scale, other.index, other.scale); + return compareExponentiallyScaledValues(index, scale, other.index, other.scale); } /** * @return The value of the zero threshold. */ public double zeroThreshold() { - return getLowerBucketBoundary(index, scale); + return exponentiallyScaledToDoubleValue(index, scale); } /** @@ -131,7 +131,7 @@ public ZeroBucket collapseOverlappingBuckets(ExponentialHistogram.BucketIterator long collapsedCount = 0; long highestCollapsedIndex = 0; - while (buckets.hasNext() && compareLowerBoundaries(buckets.peekIndex(), buckets.scale(), index, scale) < 0) { + while (buckets.hasNext() && compareExponentiallyScaledValues(buckets.peekIndex(), buckets.scale(), index, scale) < 0) { highestCollapsedIndex = buckets.peekIndex(); collapsedCount += buckets.peekCount(); buckets.advance(); @@ -141,8 +141,8 @@ public ZeroBucket collapseOverlappingBuckets(ExponentialHistogram.BucketIterator } else { long newZeroCount = count + collapsedCount; // +1 because we need to adjust the zero threshold to the upper boundary of the collapsed bucket - long collapsedUpperBoundIndex = Math.addExact(highestCollapsedIndex, 1); - if (compareLowerBoundaries(index, scale, collapsedUpperBoundIndex, buckets.scale()) >= 0) { + long collapsedUpperBoundIndex = highestCollapsedIndex + 1; + if (compareExponentiallyScaledValues(index, scale, collapsedUpperBoundIndex, buckets.scale()) >= 0) { // Our current zero-threshold is larger than the upper boundary of the largest collapsed bucket, so we keep it. return new ZeroBucket(index, scale, newZeroCount); } else { diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTests.java index df6a1773202c8..d6428a198b2fa 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTests.java @@ -15,29 +15,29 @@ import java.util.Collection; import java.util.HashSet; import java.util.List; -import java.util.Random; import java.util.stream.IntStream; import java.util.stream.LongStream; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX_BITS; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; public class DownscaleStatsTests extends ESTestCase { public void testExponential() { - long[] values = IntStream.range(0, 100).mapToLong(i -> (long) Math.min(Integer.MAX_VALUE, Math.pow(1.1, i))).distinct().toArray(); + long[] values = IntStream.range(0, 100).mapToLong(i -> (long) Math.min(MAX_INDEX, Math.pow(1.1, i))).distinct().toArray(); verifyFor(values); } public void testNumericalLimits() { - verifyFor(Long.MIN_VALUE, Long.MAX_VALUE); + verifyFor(MIN_INDEX, MAX_INDEX); } public void testRandom() { - Random rnd = new Random(42); - for (int i = 0; i < 100; i++) { - List values = IntStream.range(0, 1000).mapToObj(j -> rnd.nextLong()).distinct().toList(); + List values = IntStream.range(0, 1000).mapToObj(j -> random().nextLong(MIN_INDEX, MAX_INDEX + 1)).distinct().toList(); verifyFor(values); } } @@ -60,7 +60,7 @@ void verifyFor(Collection indices) { stats.add(prev, curr); } - for (int i = 0; i < 64; i++) { + for (int i = 0; i <= MAX_INDEX_BITS; i++) { int scaleReduction = i; long remainingCount = indices.stream().mapToLong(Long::longValue).map(index -> index >> scaleReduction).distinct().count(); long reduction = sorted.size() - remainingCount; diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java index 19f5134f4cc34..75d4a974fcc7a 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java @@ -154,7 +154,7 @@ private void assertBucketsEqual(ExponentialHistogram.BucketIterator itA, Exponen } private static ExponentialHistogram mergeWithMinimumScale(int bucketCount, int scale, ExponentialHistogram... histograms) { - ExponentialHistogramMerger merger = new ExponentialHistogramMerger(bucketCount, scale); + ExponentialHistogramMerger merger = ExponentialHistogramMerger.createForTesting(bucketCount, scale); Arrays.stream(histograms).forEach(merger::add); return merger.get(); } diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtilsTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtilsTests.java index 5e4ddaef65246..ade2bbd56c71f 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtilsTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtilsTests.java @@ -25,7 +25,7 @@ import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_SCALE; import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.SCALE_UP_CONSTANT_TABLE; import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.adjustScale; -import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.compareLowerBoundaries; +import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.compareExponentiallyScaledValues; import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.computeIndex; import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.getLowerBucketBoundary; import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.getMaximumScaleIncrease; @@ -109,17 +109,19 @@ public void testRandomIndicesScaleAdjustement() { for (int i = 0; i < 100_000; i++) { long index = rnd.nextLong(MAX_INDEX); int currentScale = rnd.nextInt(MIN_SCALE, MAX_SCALE); - int maxAdjustment = getMaximumScaleIncrease(index); + int maxAdjustment = Math.min(MAX_SCALE - currentScale, getMaximumScaleIncrease(index)); assertThat( adjustScale(adjustScale(index, currentScale, maxAdjustment), currentScale + maxAdjustment, -maxAdjustment), equalTo(index) ); - if (index > 0) { - assertThat(adjustScale(index, currentScale, maxAdjustment) * 2, greaterThan(MAX_INDEX)); - } else { - assertThat(adjustScale(index, currentScale, maxAdjustment) * 2, lessThan(MIN_INDEX)); + if (currentScale + maxAdjustment < MAX_SCALE) { + if (index > 0) { + assertThat(adjustScale(index, currentScale, maxAdjustment) * 2, greaterThan(MAX_INDEX)); + } else if (index < 0) { + assertThat(adjustScale(index, currentScale, maxAdjustment) * 2, lessThan(MIN_INDEX)); + } } } @@ -129,10 +131,10 @@ public void testRandomBucketBoundaryComparison() { Random rnd = new Random(42); for (int i = 0; i < 100_000; i++) { - long indexA = rnd.nextLong() % MAX_INDEX; - long indexB = rnd.nextLong() % MAX_INDEX; - int scaleA = rnd.nextInt() % MAX_SCALE; - int scaleB = rnd.nextInt() % MAX_SCALE; + long indexA = rnd.nextLong(MIN_INDEX, MAX_INDEX + 1); + long indexB = rnd.nextLong(MIN_INDEX, MAX_INDEX + 1); + int scaleA = rnd.nextInt(MIN_SCALE, MAX_SCALE + 1); + int scaleB = rnd.nextInt(MIN_SCALE, MAX_SCALE + 1); double lowerBoundA = getLowerBucketBoundary(indexA, scaleA); while (Double.isInfinite(lowerBoundA)) { @@ -146,7 +148,10 @@ public void testRandomBucketBoundaryComparison() { } if (lowerBoundA != lowerBoundB) { - assertThat(Double.compare(lowerBoundA, lowerBoundB), equalTo(compareLowerBoundaries(indexA, scaleA, indexB, scaleB))); + assertThat( + Double.compare(lowerBoundA, lowerBoundB), + equalTo(compareExponentiallyScaledValues(indexA, scaleA, indexB, scaleB)) + ); } } } From 25be13d71f5d39c9c338648b12c7ff502c33c3a6 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Thu, 17 Jul 2025 12:19:05 +0200 Subject: [PATCH 25/32] Refactor bucket representation --- .../ExponentialHistogramMergeBench.java | 3 +- .../exponentialhistogram/BucketIterator.java | 59 ++++++++++ .../CopyableBucketIterator.java | 24 ++++ .../ExponentialHistogram.java | 76 +++--------- .../ExponentialHistogramMerger.java | 28 +++-- .../ExponentialHistogramQuantile.java | 19 +-- .../FixedCapacityExponentialHistogram.java | 110 ++++++++++++------ .../MergingBucketIterator.java | 8 +- .../ScaleAdjustingBucketIterator.java | 6 +- .../exponentialhistogram/ZeroBucket.java | 6 +- .../ExponentialHistogramGeneratorTests.java | 2 +- .../ExponentialHistogramMergerTests.java | 20 ++-- 12 files changed, 216 insertions(+), 145 deletions(-) create mode 100644 libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/BucketIterator.java create mode 100644 libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/CopyableBucketIterator.java diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramMergeBench.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramMergeBench.java index b14501855a303..b82650d367e17 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramMergeBench.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramMergeBench.java @@ -9,6 +9,7 @@ package org.elasticsearch.benchmark.exponentialhistogram; +import org.elasticsearch.exponentialhistogram.BucketIterator; import org.elasticsearch.exponentialhistogram.ExponentialHistogram; import org.elasticsearch.exponentialhistogram.ExponentialHistogramGenerator; import org.elasticsearch.exponentialhistogram.ExponentialHistogramMerger; @@ -86,7 +87,7 @@ public void setUp() { private static int getBucketCount(ExponentialHistogram histo) { int cnt = 0; - for (ExponentialHistogram.BucketIterator it : List.of(histo.negativeBuckets(), histo.positiveBuckets())) { + for (BucketIterator it : List.of(histo.negativeBuckets(), histo.positiveBuckets())) { while (it.hasNext()) { cnt++; it.advance(); diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/BucketIterator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/BucketIterator.java new file mode 100644 index 0000000000000..1ea8df4b46322 --- /dev/null +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/BucketIterator.java @@ -0,0 +1,59 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.exponentialhistogram; + +/** + * An iterator over the non-empty buckets of the histogram for either the positive or negative range. + *
      + *
    • The iterator always iterates from the lowest bucket index to the highest.
    • + *
    • The iterator never returns duplicate buckets (buckets with the same index).
    • + *
    • The iterator never returns empty buckets ({@link #peekCount()} is never zero).
    • + *
    + */ +public interface BucketIterator { + /** + * Checks if there are any buckets remaining to be visited by this iterator. + * If the end has been reached, it is illegal to call {@link #peekCount()}, {@link #peekIndex()}, or {@link #advance()}. + * + * @return {@code true} if the iterator has more elements, {@code false} otherwise + */ + boolean hasNext(); + + /** + * The number of items in the bucket at the current iterator position. Does not advance the iterator. + * Must not be called if {@link #hasNext()} returns {@code false}. + * + * @return the number of items in the bucket, always greater than zero + */ + long peekCount(); + + /** + * The index of the bucket at the current iterator position. Does not advance the iterator. + * Must not be called if {@link #hasNext()} returns {@code false}. + * + * @return the index of the bucket, guaranteed to be in the range [{@link #MIN_INDEX}, {@link #MAX_INDEX}] + */ + long peekIndex(); + + /** + * Moves the iterator to the next, non-empty bucket. + * If {@link #hasNext()} is {@code true} after calling {@link #advance()}, {@link #peekIndex()} is guaranteed to return a value + * greater than the value returned prior to the {@link #advance()} call. + */ + void advance(); + + /** + * Provides the scale that can be used to convert indices returned by {@link #peekIndex()} to the bucket boundaries, + * e.g., via {@link ExponentialScaleUtils#getLowerBucketBoundary(long, int)}. + * + * @return the scale, which is guaranteed to be constant over the lifetime of this iterator + */ + int scale(); +} diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/CopyableBucketIterator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/CopyableBucketIterator.java new file mode 100644 index 0000000000000..196a44b25d861 --- /dev/null +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/CopyableBucketIterator.java @@ -0,0 +1,24 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.exponentialhistogram; + +/** + * A {@link BucketIterator} that can be copied. + */ +public interface CopyableBucketIterator extends BucketIterator { + + /** + * Creates a copy of this bucket iterator, pointing at the same bucket of the same range of buckets. + * Calling {@link #advance()} on the copied iterator does not affect this instance and vice-versa. + * + * @return a copy of this iterator + */ + CopyableBucketIterator copy(); +} diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java index 7ceb76158fb22..e53d544b53d9d 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java @@ -65,84 +65,38 @@ public interface ExponentialHistogram { ZeroBucket zeroBucket(); /** - * @return a {@link BucketIterator} for the populated, positive buckets of this histogram. - * The {@link BucketIterator#scale()} of the returned iterator must be the same as {@link #scale()}. + * @return a {@link Buckets} instance for the populated buckets covering the positive value range of this histogram. + * The {@link BucketIterator#scale()} of iterators obtained via {@link Buckets#iterator()} must be the same as {@link #scale()}. */ - CopyableBucketIterator positiveBuckets(); + Buckets positiveBuckets(); /** - * @return a {@link BucketIterator} for the populated, negative buckets of this histogram. - * The {@link BucketIterator#scale()} of the returned iterator must be the same as {@link #scale()}. + * @return a {@link Buckets} instance for the populated buckets covering the negative value range of this histogram. + * The {@link BucketIterator#scale()} of iterators obtained via {@link Buckets#iterator()} must be the same as {@link #scale()}. */ - CopyableBucketIterator negativeBuckets(); + Buckets negativeBuckets(); /** - * Returns the highest populated bucket index, taking both negative and positive buckets into account. - * - * @return the highest populated bucket index, or an empty optional if no buckets are populated + * Represents a bucket range of an {@link ExponentialHistogram}, either the positive or the negative range. */ - OptionalLong maximumBucketIndex(); + interface Buckets { - /** - * An iterator over the non-empty buckets of the histogram for either the positive or negative range. - *
      - *
    • The iterator always iterates from the lowest bucket index to the highest.
    • - *
    • The iterator never returns duplicate buckets (buckets with the same index).
    • - *
    • The iterator never returns empty buckets ({@link #peekCount()} is never zero).
    • - *
    - */ - interface BucketIterator { /** - * Checks if there are any buckets remaining to be visited by this iterator. - * If the end has been reached, it is illegal to call {@link #peekCount()}, {@link #peekIndex()}, or {@link #advance()}. - * - * @return {@code true} if the iterator has more elements, {@code false} otherwise + * @return a {@link BucketIterator} for the populated buckets of this bucket range. + * The {@link BucketIterator#scale()} of the returned iterator must be the same as {@link #scale()}. */ - boolean hasNext(); + CopyableBucketIterator iterator(); /** - * The number of items in the bucket at the current iterator position. Does not advance the iterator. - * Must not be called if {@link #hasNext()} returns {@code false}. - * - * @return the number of items in the bucket, always greater than zero + * @return the highest populated bucket index, or an empty optional if no buckets are populated */ - long peekCount(); + OptionalLong maxBucketIndex(); /** - * The index of the bucket at the current iterator position. Does not advance the iterator. - * Must not be called if {@link #hasNext()} returns {@code false}. - * - * @return the index of the bucket, guaranteed to be in the range [{@link #MIN_INDEX}, {@link #MAX_INDEX}] + * @return the sum of the counts across all buckets of this range */ - long peekIndex(); + long valueCount(); - /** - * Moves the iterator to the next, non-empty bucket. - * If {@link #hasNext()} is {@code true} after calling {@link #advance()}, {@link #peekIndex()} is guaranteed to return a value - * greater than the value returned prior to the {@link #advance()} call. - */ - void advance(); - - /** - * Provides the scale that can be used to convert indices returned by {@link #peekIndex()} to the bucket boundaries, - * e.g., via {@link ExponentialScaleUtils#getLowerBucketBoundary(long, int)}. - * - * @return the scale, which is guaranteed to be constant over the lifetime of this iterator - */ - int scale(); } - /** - * A {@link BucketIterator} that can be copied. - */ - interface CopyableBucketIterator extends BucketIterator { - - /** - * Creates a copy of this bucket iterator, pointing at the same bucket of the same range of buckets. - * Calling {@link #advance()} on the copied iterator does not affect this instance and vice-versa. - * - * @return a copy of this iterator - */ - CopyableBucketIterator copy(); - } } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java index cfc90a6dc1414..e0a50d81bd549 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java @@ -9,8 +9,6 @@ package org.elasticsearch.exponentialhistogram; -import java.util.OptionalLong; - import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.getMaximumScaleIncrease; /** @@ -81,10 +79,10 @@ private void doMerge(ExponentialHistogram b) { ExponentialHistogram a = result; - ExponentialHistogram.CopyableBucketIterator posBucketsA = a.positiveBuckets(); - ExponentialHistogram.CopyableBucketIterator negBucketsA = a.negativeBuckets(); - ExponentialHistogram.CopyableBucketIterator posBucketsB = b.positiveBuckets(); - ExponentialHistogram.CopyableBucketIterator negBucketsB = b.negativeBuckets(); + CopyableBucketIterator posBucketsA = a.positiveBuckets().iterator(); + CopyableBucketIterator negBucketsA = a.negativeBuckets().iterator(); + CopyableBucketIterator posBucketsB = b.positiveBuckets().iterator(); + CopyableBucketIterator negBucketsB = b.negativeBuckets().iterator(); ZeroBucket zeroBucket = a.zeroBucket().merge(b.zeroBucket()); zeroBucket = zeroBucket.collapseOverlappingBuckets(posBucketsA, negBucketsA, posBucketsB, negBucketsB); @@ -96,17 +94,17 @@ private void doMerge(ExponentialHistogram b) { // We need to ensure that we do not exceed MAX_INDEX / MIN_INDEX in this case. int targetScale = a.scale(); if (targetScale > b.scale()) { - if (posBucketsB.hasNext()) { - long smallestIndex = posBucketsB.peekIndex(); - targetScale = Math.min(targetScale, b.scale() + getMaximumScaleIncrease(smallestIndex)); - } if (negBucketsB.hasNext()) { long smallestIndex = negBucketsB.peekIndex(); - targetScale = Math.min(targetScale, b.scale() + getMaximumScaleIncrease(smallestIndex)); + long highestIndex = b.negativeBuckets().maxBucketIndex().getAsLong(); + int maxScaleIncrease = Math.min(getMaximumScaleIncrease(smallestIndex), getMaximumScaleIncrease(highestIndex)); + targetScale = Math.min(targetScale, b.scale() + maxScaleIncrease); } - OptionalLong maxIndex = b.maximumBucketIndex(); - if (maxIndex.isPresent()) { - targetScale = Math.min(targetScale, b.scale() + getMaximumScaleIncrease(maxIndex.getAsLong())); + if (posBucketsB.hasNext()) { + long smallestIndex = posBucketsB.peekIndex(); + long highestIndex = b.positiveBuckets().maxBucketIndex().getAsLong(); + int maxScaleIncrease = Math.min(getMaximumScaleIncrease(smallestIndex), getMaximumScaleIncrease(highestIndex)); + targetScale = Math.min(targetScale, b.scale() + maxScaleIncrease); } } @@ -143,7 +141,7 @@ private void doMerge(ExponentialHistogram b) { private static int putBuckets( FixedCapacityExponentialHistogram output, - ExponentialHistogram.BucketIterator buckets, + BucketIterator buckets, boolean isPositive, DownscaleStats downscaleStats ) { diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java index 0912299ddb307..ccb18da7b3e03 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java @@ -31,8 +31,8 @@ public static double getQuantile(ExponentialHistogram histo, double quantile) { } long zeroCount = histo.zeroBucket().count(); - long negCount = getTotalCount(histo.negativeBuckets()); - long posCount = getTotalCount(histo.positiveBuckets()); + long negCount = histo.negativeBuckets().valueCount(); + long posCount = histo.positiveBuckets().valueCount(); long totalCount = zeroCount + negCount + posCount; if (totalCount == 0) { @@ -56,15 +56,15 @@ public static double getQuantile(ExponentialHistogram histo, double quantile) { private static double getElementAtRank(ExponentialHistogram histo, long rank, long negCount, long zeroCount) { if (rank < negCount) { - return -getBucketMidpointForRank(histo.negativeBuckets(), (negCount - 1) - rank); + return -getBucketMidpointForRank(histo.negativeBuckets().iterator(), (negCount - 1) - rank); } else if (rank < (negCount + zeroCount)) { return 0.0; } else { - return getBucketMidpointForRank(histo.positiveBuckets(), rank - (negCount + zeroCount)); + return getBucketMidpointForRank(histo.positiveBuckets().iterator(), rank - (negCount + zeroCount)); } } - private static double getBucketMidpointForRank(ExponentialHistogram.BucketIterator buckets, long rank) { + private static double getBucketMidpointForRank(BucketIterator buckets, long rank) { long seenCount = 0; while (buckets.hasNext()) { seenCount += buckets.peekCount(); @@ -75,13 +75,4 @@ private static double getBucketMidpointForRank(ExponentialHistogram.BucketIterat } throw new IllegalStateException("The total number of elements in the buckets is less than the desired rank."); } - - private static long getTotalCount(ExponentialHistogram.BucketIterator buckets) { - long count = 0; - while (buckets.hasNext()) { - count += buckets.peekCount(); - buckets.advance(); - } - return count; - } } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java index 38ba66b7a2012..813d52398c91e 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java @@ -14,7 +14,7 @@ /** * An implementation of a mutable {@link ExponentialHistogram} with a sparse, array-backed representation. *
    - * Consumers must ensure that if the histogram is mutated, all previously acquired {@link ExponentialHistogram.BucketIterator} + * Consumers must ensure that if the histogram is mutated, all previously acquired {@link BucketIterator} * instances are no longer used. */ public final class FixedCapacityExponentialHistogram implements ExponentialHistogram { @@ -26,12 +26,24 @@ public final class FixedCapacityExponentialHistogram implements ExponentialHisto private final long[] bucketIndices; private final long[] bucketCounts; - private int negativeBucketCount; - private int positiveBucketCount; private int bucketScale; + private final AbstractBuckets negativeBuckets = new AbstractBuckets() { + @Override + int startSlot() { + return 0; + } + }; + private ZeroBucket zeroBucket; + private final AbstractBuckets positiveBuckets = new AbstractBuckets() { + @Override + int startSlot() { + return negativeBuckets.numBuckets; + } + }; + /** * Creates an empty histogram with the given capacity and a {@link ZeroBucket#minimalEmpty()} zero bucket. * The scale is initialized to the maximum possible precision ({@link #MAX_SCALE}). @@ -59,8 +71,8 @@ public void resetBuckets(int scale) { if (scale > MAX_SCALE || scale < MIN_SCALE) { throw new IllegalArgumentException("scale must be in range [" + MIN_SCALE + ".." + MAX_SCALE + "]"); } - negativeBucketCount = 0; - positiveBucketCount = 0; + negativeBuckets.reset(); + positiveBuckets.reset(); bucketScale = scale; } @@ -101,30 +113,17 @@ public boolean tryAddBucket(long index, long count, boolean isPositive) { if (index < MIN_INDEX || index > MAX_INDEX) { throw new IllegalArgumentException("index must be in range [" + MIN_INDEX + ".." + MAX_INDEX + "]"); } - if (isPositive == false && positiveBucketCount > 0) { + if (isPositive == false && positiveBuckets.numBuckets > 0) { throw new IllegalArgumentException("Cannot add negative buckets after a positive bucket has been added"); } if (count <= 0) { throw new IllegalArgumentException("Cannot add an empty or negative bucket"); } - int slot = negativeBucketCount + positiveBucketCount; - if (slot >= bucketCounts.length) { - return false; // no more space - } - bucketIndices[slot] = index; - bucketCounts[slot] = count; if (isPositive) { - if (positiveBucketCount > 0 && bucketIndices[slot - 1] >= index) { - throw new IllegalStateException("Buckets must be added in strictly ascending index order"); - } - positiveBucketCount++; + return positiveBuckets.tryAddBucket(index, count); } else { - if (negativeBucketCount > 0 && bucketIndices[slot - 1] >= index) { - throw new IllegalStateException("Buckets must be added in strictly ascending index order"); - } - negativeBucketCount++; + return negativeBuckets.tryAddBucket(index, count); } - return true; } @Override @@ -133,25 +132,68 @@ public int scale() { } @Override - public CopyableBucketIterator negativeBuckets() { - return new BucketArrayIterator(0, negativeBucketCount); + public Buckets negativeBuckets() { + return negativeBuckets; } @Override - public OptionalLong maximumBucketIndex() { - long maxIndex = Long.MIN_VALUE; - if (negativeBucketCount > 0) { - maxIndex = bucketIndices[negativeBucketCount - 1]; + public Buckets positiveBuckets() { + return positiveBuckets; + } + + private abstract class AbstractBuckets implements Buckets { + + private int numBuckets; + private int cachedValueSumForNumBuckets; + private long cachedValueSum; + + AbstractBuckets() { + reset(); + } + + abstract int startSlot(); + + final void reset() { + numBuckets = 0; + cachedValueSumForNumBuckets = 0; + cachedValueSum = 0; } - if (positiveBucketCount > 0) { - maxIndex = Math.max(maxIndex, bucketIndices[negativeBucketCount + positiveBucketCount - 1]); + + boolean tryAddBucket(long index, long count) { + int slot = startSlot() + numBuckets; + if (slot >= bucketCounts.length) { + return false; // no more space + } + bucketIndices[slot] = index; + bucketCounts[slot] = count; + numBuckets++; + return true; } - return maxIndex == Long.MIN_VALUE ? OptionalLong.empty() : OptionalLong.of(maxIndex); - } - @Override - public CopyableBucketIterator positiveBuckets() { - return new BucketArrayIterator(negativeBucketCount, negativeBucketCount + positiveBucketCount); + @Override + public CopyableBucketIterator iterator() { + int start = startSlot(); + return new BucketArrayIterator(start, start + numBuckets); + } + + @Override + public OptionalLong maxBucketIndex() { + if (numBuckets == 0) { + return OptionalLong.empty(); + } else { + return OptionalLong.of(bucketIndices[startSlot() + numBuckets - 1]); + } + } + + @Override + public long valueCount() { + int startSlot = startSlot(); + while (cachedValueSumForNumBuckets < numBuckets) { + cachedValueSum += bucketCounts[startSlot + cachedValueSumForNumBuckets]; + cachedValueSumForNumBuckets++; + } + return cachedValueSum; + } } private class BucketArrayIterator implements CopyableBucketIterator { diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java index def9a81952b0c..1ca660f62e879 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java @@ -12,10 +12,10 @@ /** * An iterator that merges two bucket iterators, aligning them to a common scale and combining buckets with the same index. */ -final class MergingBucketIterator implements ExponentialHistogram.BucketIterator { +final class MergingBucketIterator implements BucketIterator { - private final ExponentialHistogram.BucketIterator itA; - private final ExponentialHistogram.BucketIterator itB; + private final BucketIterator itA; + private final BucketIterator itB; private boolean endReached; private long currentIndex; @@ -28,7 +28,7 @@ final class MergingBucketIterator implements ExponentialHistogram.BucketIterator * @param itB the second iterator to merge * @param targetScale the histogram scale to which both iterators should be aligned */ - MergingBucketIterator(ExponentialHistogram.BucketIterator itA, ExponentialHistogram.BucketIterator itB, int targetScale) { + MergingBucketIterator(BucketIterator itA, BucketIterator itB, int targetScale) { this.itA = new ScaleAdjustingBucketIterator(itA, targetScale); this.itB = new ScaleAdjustingBucketIterator(itB, targetScale); endReached = false; diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java index 82f3141fe221f..54b4d2cb2b467 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java @@ -15,9 +15,9 @@ * An iterator that wraps another bucket iterator and adjusts its scale. * When scaling down, multiple buckets can collapse into a single one. This iterator ensures they are merged correctly. */ -final class ScaleAdjustingBucketIterator implements ExponentialHistogram.BucketIterator { +final class ScaleAdjustingBucketIterator implements BucketIterator { - private final ExponentialHistogram.BucketIterator delegate; + private final BucketIterator delegate; private final int scaleAdjustment; private long currentIndex; @@ -30,7 +30,7 @@ final class ScaleAdjustingBucketIterator implements ExponentialHistogram.BucketI * @param delegate the iterator to wrap * @param targetScale the target scale for the new iterator */ - ScaleAdjustingBucketIterator(ExponentialHistogram.BucketIterator delegate, int targetScale) { + ScaleAdjustingBucketIterator(BucketIterator delegate, int targetScale) { this.delegate = delegate; scaleAdjustment = targetScale - delegate.scale(); hasNextValue = true; diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java index b7df8f3673cd2..9379bb4a1e845 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java @@ -91,12 +91,12 @@ public ZeroBucket merge(ZeroBucket other) { * @param bucketIterators The iterators whose buckets may be collapsed. * @return A potentially updated {@link ZeroBucket} with the collapsed buckets' counts and an adjusted threshold. */ - public ZeroBucket collapseOverlappingBuckets(ExponentialHistogram.BucketIterator... bucketIterators) { + public ZeroBucket collapseOverlappingBuckets(BucketIterator... bucketIterators) { ZeroBucket current = this; ZeroBucket previous; do { previous = current; - for (ExponentialHistogram.BucketIterator buckets : bucketIterators) { + for (BucketIterator buckets : bucketIterators) { current = current.collapseOverlappingBuckets(buckets); } } while (previous.compareZeroThreshold(current) != 0); @@ -127,7 +127,7 @@ public double zeroThreshold() { * @param buckets The iterator whose buckets may be collapsed. * @return A potentially updated {@link ZeroBucket} with the collapsed buckets' counts and an adjusted threshold. */ - public ZeroBucket collapseOverlappingBuckets(ExponentialHistogram.BucketIterator buckets) { + public ZeroBucket collapseOverlappingBuckets(BucketIterator buckets) { long collapsedCount = 0; long highestCollapsedIndex = 0; diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGeneratorTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGeneratorTests.java index f0c13b3190aca..23ace7e861093 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGeneratorTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGeneratorTests.java @@ -20,7 +20,7 @@ public void testVeryLargeValue() { double value = Double.MAX_VALUE / 10; ExponentialHistogram histo = ExponentialHistogramGenerator.createFor(value); - long index = histo.positiveBuckets().peekIndex(); + long index = histo.positiveBuckets().iterator().peekIndex(); int scale = histo.scale(); double lowerBound = ExponentialScaleUtils.getLowerBucketBoundary(index, scale); diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java index 75d4a974fcc7a..9d53f8d477c5a 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java @@ -48,13 +48,13 @@ public void testZeroThresholdCollapsesOverlappingBuckets() { // only the (4, 8] bucket should be left assertThat(mergeResult.scale(), equalTo(0)); - ExponentialHistogram.BucketIterator negBuckets = mergeResult.negativeBuckets(); + BucketIterator negBuckets = mergeResult.negativeBuckets().iterator(); assertThat(negBuckets.peekIndex(), equalTo(2L)); assertThat(negBuckets.peekCount(), equalTo(7L)); negBuckets.advance(); assertThat(negBuckets.hasNext(), equalTo(false)); - ExponentialHistogram.BucketIterator posBuckets = mergeResult.positiveBuckets(); + BucketIterator posBuckets = mergeResult.positiveBuckets().iterator(); assertThat(posBuckets.peekIndex(), equalTo(2L)); assertThat(posBuckets.peekCount(), equalTo(42L)); posBuckets.advance(); @@ -67,8 +67,8 @@ public void testZeroThresholdCollapsesOverlappingBuckets() { mergeResult = mergeWithMinimumScale(100, 0, mergeResult, third); assertThat(mergeResult.zeroBucket().zeroThreshold(), closeTo(45.0, 0.000001)); assertThat(mergeResult.zeroBucket().count(), equalTo(1L + 14L + 42L + 7L)); - assertThat(mergeResult.positiveBuckets().hasNext(), equalTo(false)); - assertThat(mergeResult.negativeBuckets().hasNext(), equalTo(false)); + assertThat(mergeResult.positiveBuckets().iterator().hasNext(), equalTo(false)); + assertThat(mergeResult.negativeBuckets().iterator().hasNext(), equalTo(false)); } public void testEmptyZeroBucketIgnored() { @@ -85,7 +85,7 @@ public void testEmptyZeroBucketIgnored() { assertThat(mergeResult.zeroBucket().zeroThreshold(), equalTo(2.0)); assertThat(mergeResult.zeroBucket().count(), equalTo(10L)); - ExponentialHistogram.BucketIterator posBuckets = mergeResult.positiveBuckets(); + BucketIterator posBuckets = mergeResult.positiveBuckets().iterator(); assertThat(posBuckets.peekIndex(), equalTo(2L)); assertThat(posBuckets.peekCount(), equalTo(42L)); posBuckets.advance(); @@ -111,9 +111,9 @@ public void testUpscalingDoesNotExceedIndexLimits() { assertThat(result.scale(), equalTo(21)); if (isPositive) { - assertThat(result.positiveBuckets().peekIndex(), equalTo(adjustScale(index, 20, 1))); + assertThat(result.positiveBuckets().iterator().peekIndex(), equalTo(adjustScale(index, 20, 1))); } else { - assertThat(result.negativeBuckets().peekIndex(), equalTo(adjustScale(index, 20, 1))); + assertThat(result.negativeBuckets().iterator().peekIndex(), equalTo(adjustScale(index, 20, 1))); } } } @@ -142,8 +142,10 @@ public void testMergeOrderIndependence() { } } - private void assertBucketsEqual(ExponentialHistogram.BucketIterator itA, ExponentialHistogram.BucketIterator itB) { - assertThat("Expecting both set of buckets to be emptry or non-empty", itA.hasNext(), equalTo(itB.hasNext())); + private void assertBucketsEqual(ExponentialHistogram.Buckets bucketsA, ExponentialHistogram.Buckets bucketsB) { + BucketIterator itA = bucketsA.iterator(); + BucketIterator itB = bucketsB.iterator(); + assertThat("Expecting both set of buckets to be empty or non-empty", itA.hasNext(), equalTo(itB.hasNext())); while (itA.hasNext() && itB.hasNext()) { assertThat(itA.peekIndex(), equalTo(itB.peekIndex())); assertThat(itA.peekCount(), equalTo(itB.peekCount())); From 6100bc6bb843979190197502ffb35098d5bfa7f5 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Thu, 17 Jul 2025 12:25:08 +0200 Subject: [PATCH 26/32] Add test case for quantile in zero-bucket --- .../exponentialhistogram/QuantileAccuracyTests.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java index 4be298cf10aaf..9375443c3cbd4 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java @@ -27,6 +27,7 @@ import java.util.stream.IntStream; import static org.hamcrest.Matchers.closeTo; +import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.lessThan; import static org.hamcrest.Matchers.notANumber; @@ -68,6 +69,13 @@ public void testBasicSmall() { assertThat(maxError, lessThan(0.000001)); } + public void testPercentileOverlapsZeroBucket() { + ExponentialHistogram histo = ExponentialHistogramGenerator.createFor(-1, 0, 1); + assertThat(ExponentialHistogramQuantile.getQuantile(histo, 0.5), equalTo(0.0)); + assertThat(ExponentialHistogramQuantile.getQuantile(histo, 0.375), closeTo(-0.5, 0.000001)); + assertThat(ExponentialHistogramQuantile.getQuantile(histo, 0.625), closeTo(0.5, 0.000001)); + } + public void testBigJump() { double[] values = DoubleStream.concat(IntStream.range(0, 18).mapToDouble(Double::valueOf), DoubleStream.of(1_000_000.0)).toArray(); From 7c99c81d4d19313ea2b22e2b9d0871f74fc1a820 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Thu, 17 Jul 2025 12:27:08 +0200 Subject: [PATCH 27/32] Add more perecentiles for testing --- .../exponentialhistogram/QuantileAccuracyTests.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java index 9375443c3cbd4..7bd28b250205f 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java @@ -33,7 +33,7 @@ public class QuantileAccuracyTests extends ESTestCase { - public static final double[] QUANTILES_TO_TEST = { 0, 0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 1.0 }; + public static final double[] QUANTILES_TO_TEST = { 0, 0.0000001, 0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999999, 1.0 }; public void testUniformDistribution() { testDistributionQuantileAccuracy(new UniformRealDistribution(new Well19937c(42), 0, 100), 50000, 500); From b308838b9c137d7c98dca7ae7f0212dcd4087858 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Thu, 17 Jul 2025 13:05:37 +0200 Subject: [PATCH 28/32] Improved quantile algorithm to only iterate once over the buckets --- .../ExponentialHistogramQuantile.java | 86 +++++++++++++++---- .../QuantileAccuracyTests.java | 10 ++- 2 files changed, 77 insertions(+), 19 deletions(-) diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java index ccb18da7b3e03..927fd47f8ccf4 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java @@ -45,32 +45,88 @@ public static double getQuantile(ExponentialHistogram histo, double quantile) { long upperRank = (long) Math.ceil(exactRank); double upperFactor = exactRank - lowerRank; - // TODO: This can be optimized to iterate over the buckets once instead of twice. - return getElementAtRank(histo, lowerRank, negCount, zeroCount) * (1 - upperFactor) + getElementAtRank( - histo, - upperRank, - negCount, - zeroCount - ) * upperFactor; + ValueAndPreviousValue values = getElementAtRank(histo, upperRank); + + if (lowerRank == upperRank) { + return values.valueAtRank(); + } else { + return values.valueAtPreviousRank() * (1 - upperFactor) + values.valueAtRank() * upperFactor; + } } - private static double getElementAtRank(ExponentialHistogram histo, long rank, long negCount, long zeroCount) { - if (rank < negCount) { - return -getBucketMidpointForRank(histo.negativeBuckets().iterator(), (negCount - 1) - rank); - } else if (rank < (negCount + zeroCount)) { - return 0.0; + /** + * @param valueAtPreviousRank the value at the rank before the desired rank, NaN if not applicable. + * @param valueAtRank the value at the desired rank + */ + private record ValueAndPreviousValue(double valueAtPreviousRank, double valueAtRank + ) { + ValueAndPreviousValue negateAndSwap() { + return new ValueAndPreviousValue(-valueAtRank, -valueAtPreviousRank); + } + } + + private static ValueAndPreviousValue getElementAtRank(ExponentialHistogram histo, long rank) { + long negativeValuesCount = histo.negativeBuckets().valueCount(); + long zeroCount = histo.zeroBucket().count(); + if (rank < negativeValuesCount) { + if (negativeValuesCount == 1) { + return new ValueAndPreviousValue(Double.NaN, -getFirstBucketMidpoint(histo.negativeBuckets())); + } else { + return getBucketMidpointForRank(histo.negativeBuckets().iterator(), negativeValuesCount - rank - 1).negateAndSwap(); + } + } else if (rank < (negativeValuesCount + zeroCount)) { + if (rank == negativeValuesCount) { + // the element at the previous rank falls into the negative bucket range + return new ValueAndPreviousValue(-getFirstBucketMidpoint(histo.negativeBuckets()), 0.0); + } else { + return new ValueAndPreviousValue(0.0, 0.0); + } + } else { + ValueAndPreviousValue result = getBucketMidpointForRank(histo.positiveBuckets().iterator(), rank - negativeValuesCount - zeroCount); + if ( (rank-1) < negativeValuesCount) { + // previous value falls into the negative bucket range or is -1 + return new ValueAndPreviousValue(-getFirstBucketMidpoint(histo.negativeBuckets()), result.valueAtRank); + } else if ( (rank-1) < (negativeValuesCount + zeroCount) ) { + // previous value falls into the zero bucket + return new ValueAndPreviousValue(0.0, result.valueAtRank); + } else { + return result; + } + } + } + + private static double getFirstBucketMidpoint(ExponentialHistogram.Buckets buckets) { + CopyableBucketIterator iterator = buckets.iterator(); + if (iterator.hasNext()) { + return ExponentialScaleUtils.getPointOfLeastRelativeError(iterator.peekIndex(), iterator.scale()); } else { - return getBucketMidpointForRank(histo.positiveBuckets().iterator(), rank - (negCount + zeroCount)); + return Double.NaN; } } - private static double getBucketMidpointForRank(BucketIterator buckets, long rank) { + private static ValueAndPreviousValue getBucketMidpointForRank(BucketIterator buckets, long rank) { + long prevIndex = Long.MIN_VALUE; long seenCount = 0; while (buckets.hasNext()) { seenCount += buckets.peekCount(); if (rank < seenCount) { - return ExponentialScaleUtils.getPointOfLeastRelativeError(buckets.peekIndex(), buckets.scale()); + double center = ExponentialScaleUtils.getPointOfLeastRelativeError(buckets.peekIndex(), buckets.scale()); + double prevCenter; + if (rank > 0) { + if (buckets.peekCount() > 1) { + // element at previous rank is in same bucket + prevCenter = center; + } else { + // element at previous rank is in the previous bucket + prevCenter = ExponentialScaleUtils.getPointOfLeastRelativeError(prevIndex, buckets.scale()); + } + } else { + // there is no previous element + prevCenter = Double.NaN; + } + return new ValueAndPreviousValue(prevCenter, center); } + prevIndex = buckets.peekIndex(); buckets.advance(); } throw new IllegalStateException("The total number of elements in the buckets is less than the desired rank."); diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java index 7bd28b250205f..dc677f3aebf42 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java @@ -70,10 +70,12 @@ public void testBasicSmall() { } public void testPercentileOverlapsZeroBucket() { - ExponentialHistogram histo = ExponentialHistogramGenerator.createFor(-1, 0, 1); - assertThat(ExponentialHistogramQuantile.getQuantile(histo, 0.5), equalTo(0.0)); - assertThat(ExponentialHistogramQuantile.getQuantile(histo, 0.375), closeTo(-0.5, 0.000001)); - assertThat(ExponentialHistogramQuantile.getQuantile(histo, 0.625), closeTo(0.5, 0.000001)); + ExponentialHistogram histo = ExponentialHistogramGenerator.createFor(-2,-1, 0, 0, 0, 1, 1); + assertThat(ExponentialHistogramQuantile.getQuantile(histo, 8.0 / 16.0), equalTo(0.0)); + assertThat(ExponentialHistogramQuantile.getQuantile(histo, 7.0 / 16.0), equalTo(0.0)); + assertThat(ExponentialHistogramQuantile.getQuantile(histo, 9.0 / 16.0), equalTo(0.0)); + assertThat(ExponentialHistogramQuantile.getQuantile(histo, 5.0 / 16.0), closeTo(-0.5, 0.000001)); + assertThat(ExponentialHistogramQuantile.getQuantile(histo, 11.0 / 16.0), closeTo(0.5, 0.000001)); } public void testBigJump() { From 311f44b00fcbf3620b300f283479c940290578ce Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Fri, 18 Jul 2025 09:02:01 +0200 Subject: [PATCH 29/32] Fix quantile computation and error bound in tests --- .../ExponentialHistogramQuantile.java | 31 ++-- .../QuantileAccuracyTests.java | 142 ++++++++---------- 2 files changed, 84 insertions(+), 89 deletions(-) diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java index 927fd47f8ccf4..f9cdb7761f070 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java @@ -9,6 +9,8 @@ package org.elasticsearch.exponentialhistogram; +import java.util.OptionalLong; + /** * Provides quantile estimation for {@link ExponentialHistogram} instances. */ @@ -58,8 +60,7 @@ public static double getQuantile(ExponentialHistogram histo, double quantile) { * @param valueAtPreviousRank the value at the rank before the desired rank, NaN if not applicable. * @param valueAtRank the value at the desired rank */ - private record ValueAndPreviousValue(double valueAtPreviousRank, double valueAtRank - ) { + private record ValueAndPreviousValue(double valueAtPreviousRank, double valueAtRank) { ValueAndPreviousValue negateAndSwap() { return new ValueAndPreviousValue(-valueAtRank, -valueAtPreviousRank); } @@ -69,10 +70,10 @@ private static ValueAndPreviousValue getElementAtRank(ExponentialHistogram histo long negativeValuesCount = histo.negativeBuckets().valueCount(); long zeroCount = histo.zeroBucket().count(); if (rank < negativeValuesCount) { - if (negativeValuesCount == 1) { - return new ValueAndPreviousValue(Double.NaN, -getFirstBucketMidpoint(histo.negativeBuckets())); + if (rank == 0) { + return new ValueAndPreviousValue(Double.NaN, -getLastBucketMidpoint(histo.negativeBuckets())); } else { - return getBucketMidpointForRank(histo.negativeBuckets().iterator(), negativeValuesCount - rank - 1).negateAndSwap(); + return getBucketMidpointForRank(histo.negativeBuckets().iterator(), negativeValuesCount - rank).negateAndSwap(); } } else if (rank < (negativeValuesCount + zeroCount)) { if (rank == negativeValuesCount) { @@ -82,11 +83,14 @@ private static ValueAndPreviousValue getElementAtRank(ExponentialHistogram histo return new ValueAndPreviousValue(0.0, 0.0); } } else { - ValueAndPreviousValue result = getBucketMidpointForRank(histo.positiveBuckets().iterator(), rank - negativeValuesCount - zeroCount); - if ( (rank-1) < negativeValuesCount) { + ValueAndPreviousValue result = getBucketMidpointForRank( + histo.positiveBuckets().iterator(), + rank - negativeValuesCount - zeroCount + ); + if ((rank - 1) < negativeValuesCount) { // previous value falls into the negative bucket range or is -1 return new ValueAndPreviousValue(-getFirstBucketMidpoint(histo.negativeBuckets()), result.valueAtRank); - } else if ( (rank-1) < (negativeValuesCount + zeroCount) ) { + } else if ((rank - 1) < (negativeValuesCount + zeroCount)) { // previous value falls into the zero bucket return new ValueAndPreviousValue(0.0, result.valueAtRank); } else { @@ -104,6 +108,15 @@ private static double getFirstBucketMidpoint(ExponentialHistogram.Buckets bucket } } + private static double getLastBucketMidpoint(ExponentialHistogram.Buckets buckets) { + OptionalLong highestIndex = buckets.maxBucketIndex(); + if (highestIndex.isPresent()) { + return ExponentialScaleUtils.getPointOfLeastRelativeError(highestIndex.getAsLong(), buckets.iterator().scale()); + } else { + return Double.NaN; + } + } + private static ValueAndPreviousValue getBucketMidpointForRank(BucketIterator buckets, long rank) { long prevIndex = Long.MIN_VALUE; long seenCount = 0; @@ -113,7 +126,7 @@ private static ValueAndPreviousValue getBucketMidpointForRank(BucketIterator buc double center = ExponentialScaleUtils.getPointOfLeastRelativeError(buckets.peekIndex(), buckets.scale()); double prevCenter; if (rank > 0) { - if (buckets.peekCount() > 1) { + if ((rank - 1) >= (seenCount - buckets.peekCount())) { // element at previous rank is in same bucket prevCenter = center; } else { diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java index dc677f3aebf42..7b528f2592a8e 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java @@ -22,45 +22,54 @@ import org.elasticsearch.test.ESTestCase; import java.util.Arrays; -import java.util.Random; +import java.util.HashSet; +import java.util.Locale; import java.util.stream.DoubleStream; import java.util.stream.IntStream; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE; +import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.computeIndex; import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.lessThan; +import static org.hamcrest.Matchers.lessThanOrEqualTo; import static org.hamcrest.Matchers.notANumber; public class QuantileAccuracyTests extends ESTestCase { public static final double[] QUANTILES_TO_TEST = { 0, 0.0000001, 0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999999, 1.0 }; + private static int randomBucketCount() { + // exponentially distribute the bucket count to test more for smaller sizes + return (int) Math.round(5 + Math.pow(1995, randomDouble())); + } + public void testUniformDistribution() { - testDistributionQuantileAccuracy(new UniformRealDistribution(new Well19937c(42), 0, 100), 50000, 500); + testDistributionQuantileAccuracy(new UniformRealDistribution(new Well19937c(randomInt()), 0, 100)); } public void testNormalDistribution() { - testDistributionQuantileAccuracy(new NormalDistribution(new Well19937c(42), 100, 15), 50000, 500); + testDistributionQuantileAccuracy(new NormalDistribution(new Well19937c(randomInt()), 100, 15)); } public void testExponentialDistribution() { - testDistributionQuantileAccuracy(new ExponentialDistribution(new Well19937c(42), 10), 50000, 500); + testDistributionQuantileAccuracy(new ExponentialDistribution(new Well19937c(randomInt()), 10)); } public void testLogNormalDistribution() { - testDistributionQuantileAccuracy(new LogNormalDistribution(new Well19937c(42), 0, 1), 50000, 500); + testDistributionQuantileAccuracy(new LogNormalDistribution(new Well19937c(randomInt()), 0, 1)); } public void testGammaDistribution() { - testDistributionQuantileAccuracy(new GammaDistribution(new Well19937c(42), 2, 5), 50000, 500); + testDistributionQuantileAccuracy(new GammaDistribution(new Well19937c(randomInt()), 2, 5)); } public void testBetaDistribution() { - testDistributionQuantileAccuracy(new BetaDistribution(new Well19937c(42), 2, 5), 50000, 500); + testDistributionQuantileAccuracy(new BetaDistribution(new Well19937c(randomInt()), 2, 5)); } public void testWeibullDistribution() { - testDistributionQuantileAccuracy(new WeibullDistribution(new Well19937c(42), 2, 5), 50000, 500); + testDistributionQuantileAccuracy(new WeibullDistribution(new Well19937c(randomInt()), 2, 5)); } public void testBasicSmall() { @@ -70,7 +79,7 @@ public void testBasicSmall() { } public void testPercentileOverlapsZeroBucket() { - ExponentialHistogram histo = ExponentialHistogramGenerator.createFor(-2,-1, 0, 0, 0, 1, 1); + ExponentialHistogram histo = ExponentialHistogramGenerator.createFor(-2, -1, 0, 0, 0, 1, 1); assertThat(ExponentialHistogramQuantile.getQuantile(histo, 8.0 / 16.0), equalTo(0.0)); assertThat(ExponentialHistogramQuantile.getQuantile(histo, 7.0 / 16.0), equalTo(0.0)); assertThat(ExponentialHistogramQuantile.getQuantile(histo, 9.0 / 16.0), equalTo(0.0)); @@ -138,28 +147,20 @@ public void testSingleValueHistogram() { } public void testBucketCountImpact() { - RealDistribution distribution = new LogNormalDistribution(new Well19937c(42), 0, 1); - int sampleSize = 50000; + RealDistribution distribution = new LogNormalDistribution(new Well19937c(randomInt()), 0, 1); + int sampleSize = between(100, 50_000); double[] values = generateSamples(distribution, sampleSize); - // Test with different bucket counts - int[] bucketCounts = { 10, 50, 100, 200, 500 }; - for (int bucketCount : bucketCounts) { - double maxError = testQuantileAccuracy(values, bucketCount); - logger.info("Bucket count: " + bucketCount + ", Max relative error: " + maxError); - } - // Verify that more buckets generally means better accuracy double errorWithFewBuckets = testQuantileAccuracy(values, 20); double errorWithManyBuckets = testQuantileAccuracy(values, 200); - assertThat("More buckets should improve accuracy", errorWithManyBuckets, lessThan(errorWithFewBuckets)); + assertThat("More buckets should improve accuracy", errorWithManyBuckets, lessThanOrEqualTo(errorWithFewBuckets)); } public void testMixedSignValues() { - Random random = new Random(42); - double[] values = new double[10000]; + double[] values = new double[between(100, 10_000)]; for (int i = 0; i < values.length; i++) { - values[i] = (random.nextDouble() * 200) - 100; // Range from -100 to 100 + values[i] = (randomDouble() * 200) - 100; // Range from -100 to 100 } testQuantileAccuracy(values, 100); @@ -167,15 +168,14 @@ public void testMixedSignValues() { public void testSkewedData() { // Create a highly skewed dataset - Random random = new Random(42); double[] values = new double[10000]; for (int i = 0; i < values.length; i++) { - if (random.nextDouble() < 0.9) { + if (randomDouble() < 0.9) { // 90% of values are small - values[i] = random.nextDouble() * 10; + values[i] = randomDouble() * 10; } else { // 10% are very large - values[i] = random.nextDouble() * 10000 + 100; + values[i] = randomDouble() * 10000 + 100; } } @@ -183,22 +183,22 @@ public void testSkewedData() { } public void testDataWithZeros() { - Random random = new Random(42); double[] values = new double[10000]; for (int i = 0; i < values.length; i++) { - if (random.nextDouble() < 0.2) { + if (randomDouble() < 0.2) { // 20% zeros values[i] = 0; } else { - values[i] = random.nextDouble() * 100; + values[i] = randomDouble() * 100; } } testQuantileAccuracy(values, 100); } - private void testDistributionQuantileAccuracy(RealDistribution distribution, int sampleSize, int bucketCount) { - double[] values = generateSamples(distribution, sampleSize); + private void testDistributionQuantileAccuracy(RealDistribution distribution) { + double[] values = generateSamples(distribution, between(100, 50_000)); + int bucketCount = randomBucketCount(); testQuantileAccuracy(values, bucketCount); } @@ -213,6 +213,7 @@ private static double[] generateSamples(RealDistribution distribution, int sampl private double testQuantileAccuracy(double[] values, int bucketCount) { // Create histogram ExponentialHistogram histogram = ExponentialHistogramGenerator.createFor(bucketCount, Arrays.stream(values)); + Arrays.sort(values); // Calculate exact percentiles Percentile exactPercentile = new Percentile(); @@ -229,31 +230,28 @@ private double testQuantileAccuracy(double[] values, int bucketCount) { } else if (q == 1) { exactValue = Arrays.stream(values).max().getAsDouble(); } else { + double lower = values[Math.clamp((int) (Math.floor((values.length + 1) * q) - 1), 0, values.length - 1)]; + double upper = values[Math.clamp((int) (Math.ceil((values.length + 1) * q) - 1), 0, values.length - 1)]; + if (lower < 0 && upper > 0) { + // the percentile lies directly between a sign change and we interpolate linearly in-between + // in this case the relative error bound does not hold + continue; + } exactValue = exactPercentile.evaluate(q * 100); } + double histoValue = ExponentialHistogramQuantile.getQuantile(histogram, q); - // Skip comparison if exact value is zero to avoid division by zero - if (Math.abs(exactValue) < 1e-10) { + // Skip comparison if exact value is close to zero to avoid false-positives due to numerical imprecision + if (Math.abs(exactValue) < 1e-100) { continue; } double relativeError = Math.abs(histoValue - exactValue) / Math.abs(exactValue); maxError = Math.max(maxError, relativeError); - logger.info( - String.format( - "Quantile %.2f: Exact=%.6f, Histogram=%.6f, Relative Error=%.8f, Allowed Relative Error=%.8f", - q, - exactValue, - histoValue, - relativeError, - allowedError - ) - ); - assertThat( - String.format("Quantile %.2f should be accurate within %.6f%% relative error", q, allowedError * 100), + String.format(Locale.ENGLISH, "Quantile %.2f should be accurate within %.6f%% relative error", q, allowedError * 100), histoValue, closeTo(exactValue, Math.abs(exactValue * allowedError)) ); @@ -268,48 +266,32 @@ private double testQuantileAccuracy(double[] values, int bucketCount) { * This is an implementation of the error bound computation proven by Theorem 3 in the UDDSketch paper */ private static double getMaximumRelativeError(double[] values, int bucketCount) { - double smallestAbsNegative = Double.MAX_VALUE; - double largestAbsNegative = 0; - double smallestPositive = Double.MAX_VALUE; - double largestPositive = 0; - + HashSet usedPositiveIndices = new HashSet<>(); + HashSet usedNegativeIndices = new HashSet<>(); + int bestPossibleScale = MAX_SCALE; for (double value : values) { if (value < 0) { - smallestAbsNegative = Math.min(-value, smallestAbsNegative); - largestAbsNegative = Math.max(-value, largestAbsNegative); + usedPositiveIndices.add(computeIndex(value, bestPossibleScale)); } else if (value > 0) { - smallestPositive = Math.min(value, smallestPositive); - largestPositive = Math.max(value, largestPositive); + usedNegativeIndices.add(computeIndex(value, bestPossibleScale)); + } + while ((usedNegativeIndices.size() + usedPositiveIndices.size()) > bucketCount) { + usedNegativeIndices = rightShiftAll(usedNegativeIndices); + usedPositiveIndices = rightShiftAll(usedPositiveIndices); + bestPossibleScale--; } } + // for the best possible scale, compute the worst-case error + double base = Math.pow(2.0, Math.scalb(1.0, -bestPossibleScale)); + return 2 * base / (1 + base) - 1; + } - // Our algorithm is designed to optimally distribute the bucket budget across the positive and negative range - // therefore we simply try all variations here and assume the smallest possible error - - if (largestAbsNegative == 0) { - // only positive values - double gammaSquare = Math.pow(largestPositive / smallestPositive, 2.0 / (bucketCount)); - return (gammaSquare - 1) / (gammaSquare + 1); - } else if (smallestAbsNegative == 0) { - // only negative values - double gammaSquare = Math.pow(largestAbsNegative / smallestAbsNegative, 2.0 / (bucketCount)); - return (gammaSquare - 1) / (gammaSquare + 1); - } else { - double smallestError = Double.MAX_VALUE; - for (int positiveBuckets = 1; positiveBuckets < bucketCount - 1; positiveBuckets++) { - int negativeBuckets = bucketCount - positiveBuckets; - - double gammaSquareNeg = Math.pow(largestAbsNegative / smallestAbsNegative, 2.0 / (negativeBuckets)); - double errorNeg = (gammaSquareNeg - 1) / (gammaSquareNeg + 1); - - double gammaSquarePos = Math.pow(largestAbsNegative / smallestAbsNegative, 2.0 / (positiveBuckets)); - double errorPos = (gammaSquarePos - 1) / (gammaSquarePos + 1); - - double error = Math.max(errorNeg, errorPos); - smallestError = Math.min(smallestError, error); - } - return smallestError; + private static HashSet rightShiftAll(HashSet indices) { + HashSet result = new HashSet<>(); + for (long index : indices) { + result.add(index >> 1); } + return result; } } From 54fd41d180397e217070d80ad20d1f51df8c6da1 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Fri, 18 Jul 2025 12:18:41 +0200 Subject: [PATCH 30/32] Update randomization in remaining tests --- .../DownscaleStatsTests.java | 2 +- .../ExponentialHistogramMergerTests.java | 7 +-- .../ExponentialScaleUtilsTests.java | 32 +++++-------- ...ixedCapacityExponentialHistogramTests.java | 48 +++++++++++++++++++ 4 files changed, 63 insertions(+), 26 deletions(-) create mode 100644 libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogramTests.java diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTests.java index d6428a198b2fa..86a32388c8f1b 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTests.java @@ -37,7 +37,7 @@ public void testNumericalLimits() { public void testRandom() { for (int i = 0; i < 100; i++) { - List values = IntStream.range(0, 1000).mapToObj(j -> random().nextLong(MIN_INDEX, MAX_INDEX + 1)).distinct().toList(); + List values = IntStream.range(0, 1000).mapToObj(j -> randomLongBetween(MIN_INDEX, MAX_INDEX)).distinct().toList(); verifyFor(values); } } diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java index 9d53f8d477c5a..dbb26dca06fb0 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java @@ -15,7 +15,6 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; -import java.util.Random; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -122,17 +121,15 @@ public void testUpscalingDoesNotExceedIndexLimits() { * Verify that the resulting histogram is independent of the order of elements and therefore merges performed. */ public void testMergeOrderIndependence() { - Random rnd = new Random(42); - List values = IntStream.range(0, 10_000) - .mapToDouble(i -> i < 17 ? 0 : rnd.nextDouble() * Math.pow(10, rnd.nextLong() % 4)) + .mapToDouble(i -> i < 17 ? 0 : (-1 + 2 * randomDouble()) * Math.pow(10, randomIntBetween(-4, 4))) .boxed() .collect(Collectors.toCollection(ArrayList::new)); ExponentialHistogram reference = ExponentialHistogramGenerator.createFor(20, values.stream().mapToDouble(Double::doubleValue)); for (int i = 0; i < 100; i++) { - Collections.shuffle(values, rnd); + Collections.shuffle(values, random()); ExponentialHistogram shuffled = ExponentialHistogramGenerator.createFor(20, values.stream().mapToDouble(Double::doubleValue)); assertThat("Expected same scale", shuffled.scale(), equalTo(reference.scale())); diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtilsTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtilsTests.java index ade2bbd56c71f..2bacefe552fd0 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtilsTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtilsTests.java @@ -16,7 +16,6 @@ import java.math.BigDecimal; import java.math.MathContext; import java.math.RoundingMode; -import java.util.Random; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX_BITS; @@ -66,13 +65,11 @@ public void testExtremeValueIndexing() { } public void testRandomValueIndexing() { - Random rnd = new Random(42); - for (int i = 0; i < 100_000; i++) { // generate values in the range 10^-100 to 10^100 - double exponent = rnd.nextDouble() * 200 - 100; + double exponent = randomDouble() * 200 - 100; double testValue = Math.pow(10, exponent); - int scale = rnd.nextInt(MIN_SCALE / 2, MAX_SCALE / 2); + int scale = randomIntBetween(MIN_SCALE / 2, MAX_SCALE / 2); long index = computeIndex(testValue, scale); double lowerBound = getLowerBucketBoundary(index, scale); @@ -104,11 +101,10 @@ public void testRandomValueIndexing() { } public void testRandomIndicesScaleAdjustement() { - Random rnd = new Random(42); for (int i = 0; i < 100_000; i++) { - long index = rnd.nextLong(MAX_INDEX); - int currentScale = rnd.nextInt(MIN_SCALE, MAX_SCALE); + long index = randomLongBetween(MIN_INDEX, MAX_INDEX); + int currentScale = randomIntBetween(MIN_SCALE, MAX_SCALE); int maxAdjustment = Math.min(MAX_SCALE - currentScale, getMaximumScaleIncrease(index)); assertThat( @@ -120,7 +116,6 @@ public void testRandomIndicesScaleAdjustement() { assertThat(adjustScale(index, currentScale, maxAdjustment) * 2, greaterThan(MAX_INDEX)); } else if (index < 0) { assertThat(adjustScale(index, currentScale, maxAdjustment) * 2, lessThan(MIN_INDEX)); - } } } @@ -128,13 +123,12 @@ public void testRandomIndicesScaleAdjustement() { } public void testRandomBucketBoundaryComparison() { - Random rnd = new Random(42); for (int i = 0; i < 100_000; i++) { - long indexA = rnd.nextLong(MIN_INDEX, MAX_INDEX + 1); - long indexB = rnd.nextLong(MIN_INDEX, MAX_INDEX + 1); - int scaleA = rnd.nextInt(MIN_SCALE, MAX_SCALE + 1); - int scaleB = rnd.nextInt(MIN_SCALE, MAX_SCALE + 1); + long indexA = randomLongBetween(MIN_INDEX, MAX_INDEX); + long indexB = randomLongBetween(MIN_INDEX, MAX_INDEX); + int scaleA = randomIntBetween(MIN_SCALE, MAX_SCALE); + int scaleB = randomIntBetween(MIN_SCALE, MAX_SCALE); double lowerBoundA = getLowerBucketBoundary(indexA, scaleA); while (Double.isInfinite(lowerBoundA)) { @@ -157,20 +151,18 @@ public void testRandomBucketBoundaryComparison() { } public void testUpscalingAccuracy() { - Random rnd = new Random(42); - // Use slightly adjusted scales to not run into numeric trouble, because we don't use exact maths here int minScale = MIN_SCALE + 7; int maxScale = MAX_SCALE - 15; for (int i = 0; i < 10_000; i++) { - int startScale = rnd.nextInt(minScale, maxScale); - int scaleIncrease = rnd.nextInt(1, maxScale - startScale + 1); + int startScale = randomIntBetween(minScale, maxScale - 1); + int scaleIncrease = randomIntBetween(1, maxScale - startScale); - long index = MAX_INDEX >> scaleIncrease >> (int) (rnd.nextDouble() * (MAX_INDEX_BITS - scaleIncrease)); + long index = MAX_INDEX >> scaleIncrease >> (int) (randomDouble() * (MAX_INDEX_BITS - scaleIncrease)); index = Math.max(1, index); - index = (long) (rnd.nextDouble() * index) * (rnd.nextBoolean() ? 1 : -1); + index = (long) ((2 * randomDouble() - 1) * index); double midPoint = getPointOfLeastRelativeError(index, startScale); // limit the numeric range, otherwise we get rounding errors causing the test to fail diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogramTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogramTests.java new file mode 100644 index 0000000000000..b3a698b2ccd1a --- /dev/null +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogramTests.java @@ -0,0 +1,48 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.exponentialhistogram; + +import org.elasticsearch.test.ESTestCase; + +import static org.hamcrest.Matchers.equalTo; + +public class FixedCapacityExponentialHistogramTests extends ESTestCase { + + public void testValueCountUpdatedCorrectly() { + + FixedCapacityExponentialHistogram histogram = new FixedCapacityExponentialHistogram(100); + + assertThat(histogram.negativeBuckets().valueCount(), equalTo(0L)); + assertThat(histogram.positiveBuckets().valueCount(), equalTo(0L)); + + histogram.tryAddBucket(1, 10, false); + + assertThat(histogram.negativeBuckets().valueCount(), equalTo(10L)); + assertThat(histogram.positiveBuckets().valueCount(), equalTo(0L)); + + histogram.tryAddBucket(2, 3, false); + histogram.tryAddBucket(3, 4, false); + histogram.tryAddBucket(1, 5, true); + + assertThat(histogram.negativeBuckets().valueCount(), equalTo(17L)); + assertThat(histogram.positiveBuckets().valueCount(), equalTo(5L)); + + histogram.tryAddBucket(2, 3, true); + histogram.tryAddBucket(3, 4, true); + + assertThat(histogram.negativeBuckets().valueCount(), equalTo(17L)); + assertThat(histogram.positiveBuckets().valueCount(), equalTo(12L)); + + histogram.resetBuckets(0); + + assertThat(histogram.negativeBuckets().valueCount(), equalTo(0L)); + assertThat(histogram.positiveBuckets().valueCount(), equalTo(0L)); + } +} From 7dad0891a58c3ce7a38e6f5c182d71375f1496b9 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Fri, 18 Jul 2025 12:56:55 +0200 Subject: [PATCH 31/32] Fix javadoc and benchmarks --- .../ExponentialHistogramGenerationBench.java | 2 +- .../ExponentialHistogramMergeBench.java | 2 +- libs/exponential-histogram/build.gradle | 5 +++++ .../exponentialhistogram/BucketIterator.java | 2 +- .../exponentialhistogram/DownscaleStats.java | 6 ++++++ .../FixedCapacityExponentialHistogram.java | 9 +++++++-- 6 files changed, 21 insertions(+), 5 deletions(-) diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramGenerationBench.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramGenerationBench.java index f009aaf813eb0..954b627a40a67 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramGenerationBench.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramGenerationBench.java @@ -61,7 +61,7 @@ public void setUp() { Supplier nextRandom = () -> distribution.equals("GAUSSIAN") ? random.nextGaussian() : random.nextDouble(); - // TODO: why is this here for T-DIGEST? + // Make sure that we start with a non-empty histogram, as this distorts initial additions for (int i = 0; i < 10000; ++i) { histoGenerator.add(nextRandom.get()); } diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramMergeBench.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramMergeBench.java index b82650d367e17..22e38b24cc886 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramMergeBench.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramMergeBench.java @@ -87,7 +87,7 @@ public void setUp() { private static int getBucketCount(ExponentialHistogram histo) { int cnt = 0; - for (BucketIterator it : List.of(histo.negativeBuckets(), histo.positiveBuckets())) { + for (BucketIterator it : List.of(histo.negativeBuckets().iterator(), histo.positiveBuckets().iterator())) { while (it.hasNext()) { cnt++; it.advance(); diff --git a/libs/exponential-histogram/build.gradle b/libs/exponential-histogram/build.gradle index 7115c2086c890..6451931297597 100644 --- a/libs/exponential-histogram/build.gradle +++ b/libs/exponential-histogram/build.gradle @@ -16,3 +16,8 @@ dependencies { testImplementation('ch.obermuhlner:big-math:2.3.2') testImplementation('org.apache.commons:commons-math3:3.6.1') } + +tasks.named('forbiddenApisMain').configure { + // this lib does not depend on core, so only jdk signatures should be checked + replaceSignatureFiles 'jdk-signatures' +} diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/BucketIterator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/BucketIterator.java index 1ea8df4b46322..5e6f833c47fe7 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/BucketIterator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/BucketIterator.java @@ -38,7 +38,7 @@ public interface BucketIterator { * The index of the bucket at the current iterator position. Does not advance the iterator. * Must not be called if {@link #hasNext()} returns {@code false}. * - * @return the index of the bucket, guaranteed to be in the range [{@link #MIN_INDEX}, {@link #MAX_INDEX}] + * @return the index of the bucket, guaranteed to be in the range [{@link ExponentialHistogram#MIN_INDEX}, {@link ExponentialHistogram#MAX_INDEX}] */ long peekIndex(); diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java index 8578f11b3e3db..23eebbddd1d58 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java @@ -32,6 +32,12 @@ void reset() { Arrays.fill(collapsedBucketCount, 0); } + /** + * Adds a pair of neighboring bucket indices to track for potential merging. + * + * @param previousBucketIndex the index of the previous bucket + * @param currentBucketIndex the index of the current bucket + */ void add(long previousBucketIndex, long currentBucketIndex) { if (currentBucketIndex <= previousBucketIndex) { throw new IllegalArgumentException("currentBucketIndex must be greater than previousBucketIndex"); diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java index 813d52398c91e..9742c0669d729 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java @@ -66,6 +66,8 @@ public void reset() { /** * Removes all positive and negative buckets from this histogram and sets the scale to the given value. + * + * @param scale the scale to set for this histogram */ public void resetBuckets(int scale) { if (scale > MAX_SCALE || scale < MIN_SCALE) { @@ -83,7 +85,10 @@ public ZeroBucket zeroBucket() { /** * Replaces the zero bucket of this histogram with the given one. - * Callers must ensure that the given {@link ZeroBucket} does not overlap with any of the positive or negative buckets of this histogram. + * Callers must ensure that the given {@link ZeroBucket} does not + * overlap with any of the positive or negative buckets of this histogram. + * + * @param zeroBucket the zero bucket to set */ public void setZeroBucket(ZeroBucket zeroBucket) { this.zeroBucket = zeroBucket; @@ -97,7 +102,7 @@ public void setZeroBucket(ZeroBucket zeroBucket) { *
  • All buckets from the negative range must be provided before the first one from the positive range.
  • *
  • For both the negative and positive ranges, buckets must be provided in ascending index order.
  • *
  • It is not allowed to provide the same bucket more than once.
  • - *
  • It is not allowed to add empty buckets (count <= 0).
  • + *
  • It is not allowed to add empty buckets ({@code count <= 0}).
  • *
* * If any of these rules are violated, this call will fail with an exception. From 522a72a51edf91d14edf7bae7695c5a10072b5b7 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Fri, 18 Jul 2025 14:04:40 +0200 Subject: [PATCH 32/32] Checkstyle --- .../elasticsearch/exponentialhistogram/BucketIterator.java | 4 +++- .../exponentialhistogram/ExponentialHistogramMerger.java | 3 ++- .../exponentialhistogram/ExponentialScaleUtils.java | 3 ++- .../org/elasticsearch/exponentialhistogram/ZeroBucket.java | 3 ++- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/BucketIterator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/BucketIterator.java index 5e6f833c47fe7..f581e00a0bbc3 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/BucketIterator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/BucketIterator.java @@ -38,7 +38,9 @@ public interface BucketIterator { * The index of the bucket at the current iterator position. Does not advance the iterator. * Must not be called if {@link #hasNext()} returns {@code false}. * - * @return the index of the bucket, guaranteed to be in the range [{@link ExponentialHistogram#MIN_INDEX}, {@link ExponentialHistogram#MAX_INDEX}] + * @return the index of the bucket, guaranteed to be in the range + * [{@link ExponentialHistogram#MIN_INDEX}, + * {@link ExponentialHistogram#MAX_INDEX}] */ long peekIndex(); diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java index e0a50d81bd549..3a50447bba971 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java @@ -12,7 +12,8 @@ import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.getMaximumScaleIncrease; /** - * Allows accumulating multiple {@link ExponentialHistogram} into a single one while keeping the bucket count in the result below a given limit. + * Allows accumulating multiple {@link ExponentialHistogram} into a single one + * while keeping the bucket count in the result below a given limit. */ public class ExponentialHistogramMerger { diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java index 27429c9af474d..e7e00b09d324f 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java @@ -130,7 +130,8 @@ static long adjustScale(long index, int currentScale, int scaleAdjustment) { * @param scaleA the scale of the first bucket * @param idxB the index of the second bucket * @param scaleB the scale of the second bucket - * @return a negative integer, zero, or a positive integer as the first bucket's lower boundary is less than, equal to, or greater than the second bucket's lower boundary + * @return a negative integer, zero, or a positive integer as the first bucket's lower boundary is + * less than, equal to, or greater than the second bucket's lower boundary */ public static int compareExponentiallyScaledValues(long idxA, int scaleA, long idxB, int scaleB) { if (scaleA > scaleB) { diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java index 9379bb4a1e845..ef68b7599731e 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java @@ -107,7 +107,8 @@ public ZeroBucket collapseOverlappingBuckets(BucketIterator... bucketIterators) * Compares the zero threshold of this bucket with another one. * * @param other The other zero bucket to compare against. - * @return A negative integer, zero, or a positive integer if this bucket's threshold is less than, equal to, or greater than the other's. + * @return A negative integer, zero, or a positive integer if this bucket's threshold is less than, + * equal to, or greater than the other's. */ public int compareZeroThreshold(ZeroBucket other) { return compareExponentiallyScaledValues(index, scale, other.index, other.scale);