Skip to content

Commit b2c1f0b

Browse files
committed
Cache preset dict in LZ4WithPresetDictDecompressor
1 parent a7ff87f commit b2c1f0b

File tree

5 files changed

+30
-11
lines changed

5 files changed

+30
-11
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,8 @@ Optimizations
204204

205205
* GITHUB#14304: Add SIMD optimizations for scalar quantized queries and indexing. (Simon Cooper)
206206

207+
* GITHUB#14397: Cache preset dict for LZ4WithPresetDictDecompressor. (kkewwei)
208+
207209
Bug Fixes
208210
---------------------
209211

lucene/core/src/java/org/apache/lucene/codecs/compressing/Decompressor.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,4 +44,6 @@ public abstract void decompress(
4444

4545
@Override
4646
public abstract Decompressor clone();
47+
48+
public void reset() {}
4749
}

lucene/core/src/java/org/apache/lucene/codecs/lucene90/LZ4WithPresetDictCompressionMode.java

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ private static final class LZ4WithPresetDictDecompressor extends Decompressor {
6464

6565
private int[] compressedLengths;
6666
private byte[] buffer;
67+
private boolean reused = false;
6768

6869
LZ4WithPresetDictDecompressor() {
6970
compressedLengths = new int[0];
@@ -72,16 +73,15 @@ private static final class LZ4WithPresetDictDecompressor extends Decompressor {
7273

7374
private int readCompressedLengths(
7475
DataInput in, int originalLength, int dictLength, int blockLength) throws IOException {
75-
in.readVInt(); // compressed length of the dictionary, unused
76-
int totalLength = dictLength;
76+
compressedLengths = ArrayUtil.growNoCopy(compressedLengths, originalLength / blockLength + 2);
7777
int i = 0;
78-
compressedLengths = ArrayUtil.growNoCopy(compressedLengths, originalLength / blockLength + 1);
78+
compressedLengths[i++] = in.readVInt(); // compressed length of the dictionary
79+
int totalLength = dictLength;
7980
while (totalLength < originalLength) {
80-
8181
compressedLengths[i++] = in.readVInt();
8282
totalLength += blockLength;
8383
}
84-
return i;
84+
return i - 1;
8585
}
8686

8787
@Override
@@ -98,12 +98,17 @@ public void decompress(DataInput in, int originalLength, int offset, int length,
9898
final int blockLength = in.readVInt();
9999

100100
final int numBlocks = readCompressedLengths(in, originalLength, dictLength, blockLength);
101-
102-
buffer = ArrayUtil.growNoCopy(buffer, dictLength + blockLength);
103101
bytes.length = 0;
104-
// Read the dictionary
105-
if (LZ4.decompress(in, dictLength, buffer, 0) != dictLength) {
106-
throw new CorruptIndexException("Illegal dict length", in);
102+
if (reused) {
103+
assert buffer.length >= dictLength + blockLength;
104+
in.skipBytes(compressedLengths[0]);
105+
} else {
106+
// Read the dictionary
107+
buffer = ArrayUtil.growNoCopy(buffer, dictLength + blockLength);
108+
if (LZ4.decompress(in, dictLength, buffer, 0) != dictLength) {
109+
throw new CorruptIndexException("Illegal dict length", in);
110+
}
111+
reused = true;
107112
}
108113

109114
int offsetInBlock = dictLength;
@@ -114,7 +119,7 @@ public void decompress(DataInput in, int originalLength, int offset, int length,
114119
// Skip unneeded blocks
115120
int numBytesToSkip = 0;
116121
for (int i = 0; i < numBlocks && offsetInBlock + blockLength < offset; ++i) {
117-
int compressedBlockLength = compressedLengths[i];
122+
int compressedBlockLength = compressedLengths[i + 1];
118123
numBytesToSkip += compressedBlockLength;
119124
offsetInBlock += blockLength;
120125
offsetInBytesRef -= blockLength;
@@ -148,6 +153,11 @@ public void decompress(DataInput in, int originalLength, int offset, int length,
148153
public Decompressor clone() {
149154
return new LZ4WithPresetDictDecompressor();
150155
}
156+
157+
@Override
158+
public void reset() {
159+
reused = false;
160+
}
151161
}
152162

153163
private static class LZ4WithPresetDictCompressor extends Compressor {

lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingStoredFieldsReader.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -512,6 +512,7 @@ private void doReset(int docID) throws IOException {
512512
bytes.offset = bytes.length = 0;
513513
for (int decompressed = 0; decompressed < totalLength; ) {
514514
final int toDecompress = Math.min(totalLength - decompressed, chunkSize);
515+
decompressor.reset();
515516
decompressor.decompress(fieldsStream, toDecompress, 0, toDecompress, spare);
516517
bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + spare.length);
517518
System.arraycopy(spare.bytes, spare.offset, bytes.bytes, bytes.length, spare.length);
@@ -560,6 +561,7 @@ SerializedDocument document(int docID) throws IOException {
560561
documentInput = new ByteArrayDataInput(bytes.bytes, bytes.offset + offset, length);
561562
} else if (sliced) {
562563
fieldsStream.seek(startPointer);
564+
decompressor.reset();
563565
decompressor.decompress(
564566
fieldsStream, chunkSize, offset, Math.min(length, chunkSize - offset), bytes);
565567
documentInput =
@@ -573,6 +575,7 @@ void fillBuffer() throws IOException {
573575
throw new EOFException();
574576
}
575577
final int toDecompress = Math.min(length - decompressed, chunkSize);
578+
decompressor.reset();
576579
decompressor.decompress(fieldsStream, toDecompress, 0, toDecompress, bytes);
577580
decompressed += toDecompress;
578581
}
@@ -644,6 +647,7 @@ SerializedDocument serializedDocument(int docID) throws IOException {
644647
if (state.contains(docID) == false) {
645648
fieldsStream.seek(indexReader.getStartPointer(docID));
646649
state.reset(docID);
650+
decompressor.reset();
647651
}
648652
assert state.contains(docID);
649653
return state.document(docID);

lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,7 @@ public Fields get(int doc) throws IOException {
368368
startPointer = blockState.startPointer; // avoid searching the start pointer
369369
} else {
370370
startPointer = indexReader.getStartPointer(doc);
371+
decompressor.reset();
371372
}
372373
vectorsStream.seek(startPointer);
373374

0 commit comments

Comments
 (0)