Skip to content

Commit 44f9b88

Browse files
Fix match_only_text keyword multi-field bug (#131383) (#131467)
In #131314 we fixed match_only_text fields with ignore_above keyword multi-fields in the case that the keyword multi-field is stored. However, the issue is still present if the keyword field is not stored, but instead has doc values. This patch fixes that case.
1 parent 6980096 commit 44f9b88

File tree

2 files changed

+145
-9
lines changed

2 files changed

+145
-9
lines changed

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

Lines changed: 57 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@
6767
import java.io.IOException;
6868
import java.io.UncheckedIOException;
6969
import java.util.ArrayList;
70-
import java.util.Arrays;
7170
import java.util.Collections;
7271
import java.util.List;
7372
import java.util.Map;
@@ -253,11 +252,18 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
253252
if (searchExecutionContext.isSourceSynthetic() && withinMultiField) {
254253
String parentField = searchExecutionContext.parentPath(name());
255254
var parent = searchExecutionContext.lookup().fieldType(parentField);
256-
if (parent.isStored()) {
257-
if (parent instanceof KeywordFieldMapper.KeywordFieldType keywordParent
258-
&& keywordParent.ignoreAbove() != Integer.MAX_VALUE) {
255+
256+
if (parent instanceof KeywordFieldMapper.KeywordFieldType keywordParent
257+
&& keywordParent.ignoreAbove() != Integer.MAX_VALUE) {
258+
if (parent.isStored()) {
259259
return storedFieldFetcher(parentField, keywordParent.originalName());
260+
} else if (parent.hasDocValues()) {
261+
var ifd = searchExecutionContext.getForField(parent, MappedFieldType.FielddataOperation.SEARCH);
262+
return combineFieldFetchers(docValuesFieldFetcher(ifd), storedFieldFetcher(keywordParent.originalName()));
260263
}
264+
}
265+
266+
if (parent.isStored()) {
261267
return storedFieldFetcher(parentField);
262268
} else if (parent.hasDocValues()) {
263269
var ifd = searchExecutionContext.getForField(parent, MappedFieldType.FielddataOperation.SEARCH);
@@ -268,14 +274,21 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
268274
} else if (searchExecutionContext.isSourceSynthetic() && hasCompatibleMultiFields) {
269275
var mapper = (MatchOnlyTextFieldMapper) searchExecutionContext.getMappingLookup().getMapper(name());
270276
var kwd = TextFieldMapper.SyntheticSourceHelper.getKeywordFieldMapperForSyntheticSource(mapper);
277+
271278
if (kwd != null) {
272279
var fieldType = kwd.fieldType();
273-
if (fieldType.isStored()) {
274-
if (fieldType.ignoreAbove() != Integer.MAX_VALUE) {
280+
281+
if (fieldType.ignoreAbove() != Integer.MAX_VALUE) {
282+
if (fieldType.isStored()) {
275283
return storedFieldFetcher(fieldType.name(), fieldType.originalName());
276-
} else {
277-
return storedFieldFetcher(fieldType.name());
284+
} else if (fieldType.hasDocValues()) {
285+
var ifd = searchExecutionContext.getForField(fieldType, MappedFieldType.FielddataOperation.SEARCH);
286+
return combineFieldFetchers(docValuesFieldFetcher(ifd), storedFieldFetcher(fieldType.originalName()));
278287
}
288+
}
289+
290+
if (fieldType.isStored()) {
291+
return storedFieldFetcher(fieldType.name());
279292
} else if (fieldType.hasDocValues()) {
280293
var ifd = searchExecutionContext.getForField(fieldType, MappedFieldType.FielddataOperation.SEARCH);
281294
return docValuesFieldFetcher(ifd);
@@ -332,7 +345,42 @@ private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IO
332345
if (names.length == 1) {
333346
return storedFields.get(names[0]);
334347
}
335-
return Arrays.stream(names).map(storedFields::get).filter(Objects::nonNull).flatMap(List::stream).toList();
348+
349+
List<Object> values = new ArrayList<>();
350+
for (var name : names) {
351+
var currValues = storedFields.get(name);
352+
if (currValues != null) {
353+
values.addAll(currValues);
354+
}
355+
}
356+
357+
return values;
358+
};
359+
};
360+
}
361+
362+
private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> combineFieldFetchers(
363+
IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> primaryFetcher,
364+
IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> secondaryFetcher
365+
) {
366+
return context -> {
367+
var primaryGetter = primaryFetcher.apply(context);
368+
var secondaryGetter = secondaryFetcher.apply(context);
369+
return docId -> {
370+
List<Object> values = new ArrayList<>();
371+
var primary = primaryGetter.apply(docId);
372+
if (primary != null) {
373+
values.addAll(primary);
374+
}
375+
376+
var secondary = secondaryGetter.apply(docId);
377+
if (secondary != null) {
378+
values.addAll(secondary);
379+
}
380+
381+
assert primary != null || secondary != null;
382+
383+
return values;
336384
};
337385
};
338386
}

modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,50 @@ synthetic_source match_only_text as multi-field:
435435
- match:
436436
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
437437

438+
---
439+
synthetic_source match_only_text as multi-field with ignored keyword as parent:
440+
- requires:
441+
cluster_features: [ "mapper.source.mode_from_index_setting" ]
442+
reason: "Source mode configured through index setting"
443+
444+
- do:
445+
indices.create:
446+
index: synthetic_source_test
447+
body:
448+
settings:
449+
index:
450+
mapping.source.mode: synthetic
451+
mappings:
452+
properties:
453+
foo:
454+
type: keyword
455+
store: false
456+
doc_values: true
457+
ignore_above: 10
458+
fields:
459+
text:
460+
type: match_only_text
461+
462+
- do:
463+
index:
464+
index: synthetic_source_test
465+
id: "1"
466+
refresh: true
467+
body:
468+
foo: [ "Apache Lucene powers Elasticsearch", "Apache" ]
469+
470+
- do:
471+
search:
472+
index: synthetic_source_test
473+
body:
474+
query:
475+
match_phrase:
476+
foo.text: apache lucene
477+
478+
- match: { "hits.total.value": 1 }
479+
- match:
480+
hits.hits.0._source.foo: [ "Apache", "Apache Lucene powers Elasticsearch" ]
481+
438482
---
439483
synthetic_source match_only_text as multi-field with stored keyword as parent:
440484
- requires:
@@ -562,6 +606,50 @@ synthetic_source match_only_text with multi-field:
562606
- match:
563607
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
564608

609+
---
610+
synthetic_source match_only_text with ignored multi-field:
611+
- requires:
612+
cluster_features: [ "mapper.source.mode_from_index_setting" ]
613+
reason: "Source mode configured through index setting"
614+
615+
- do:
616+
indices.create:
617+
index: synthetic_source_test
618+
body:
619+
settings:
620+
index:
621+
mapping.source.mode: synthetic
622+
mappings:
623+
properties:
624+
foo:
625+
type: match_only_text
626+
fields:
627+
raw:
628+
type: keyword
629+
store: false
630+
doc_values: true
631+
ignore_above: 10
632+
633+
- do:
634+
index:
635+
index: synthetic_source_test
636+
id: "1"
637+
refresh: true
638+
body:
639+
foo: "Apache Lucene powers Elasticsearch"
640+
641+
- do:
642+
search:
643+
index: synthetic_source_test
644+
body:
645+
query:
646+
match_phrase:
647+
foo: apache lucene
648+
649+
- match: { "hits.total.value": 1 }
650+
- match:
651+
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
652+
565653
---
566654
synthetic_source match_only_text with stored multi-field:
567655
- requires:

0 commit comments

Comments
 (0)