Skip to content

Don't normalize fields of type text when the index mode is LogsDB or TSDB #131317

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
16 changes: 16 additions & 0 deletions docs/changelog/131317.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
pr: 131317
summary: Don't enable norms for fields of type text when the index mode is LogsDB or TSDB
area: Mapping
type: breaking
issues: []
breaking:
title: Don't enable norms for fields of type text when the index mode is LogsDB or TSDB
area: Mapping
details: "This changes the default behavior for norms on `text` fields in logsdb\
\ and tsdb indices. Prior to this change, norms were enabled by default, with\
\ the option to disable them via manual configurations. After this change, norms\
\ will be disabled by default. Note, because we dont support enabling norms from\
\ a disabled state, users will not be able to enable norms on `text` fields in\
\ logsdb and tsdb indices."
impact: Text fields will no longer be normalized by default in LogsDB and TSDB indicies.
notable: false
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ public static class Builder extends FieldMapper.Builder {
final Parameter<SimilarityProvider> similarity = TextParams.similarity(m -> builder(m).similarity.get());

final Parameter<String> indexOptions = TextParams.textIndexOptions(m -> builder(m).indexOptions.get());
final Parameter<Boolean> norms = TextParams.norms(true, m -> builder(m).norms.get());
final Parameter<Boolean> norms = Parameter.normsParam(m -> builder(m).norms.get(), true);
final Parameter<String> termVectors = TextParams.termVectors(m -> builder(m).termVectors.get());

private final Parameter<Map<String, String>> meta = Parameter.metaParam();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ public static class Builder extends FieldMapper.Builder {
final Parameter<Boolean> stored = Parameter.storeParam(m -> toType(m).fieldType.stored(), false);

final Parameter<String> indexOptions = TextParams.keywordIndexOptions(m -> toType(m).indexOptions);
final Parameter<Boolean> hasNorms = TextParams.norms(false, m -> toType(m).fieldType.omitNorms() == false);
final Parameter<Boolean> hasNorms = Parameter.normsParam(m -> toType(m).fieldType.omitNorms() == false, false);

final Parameter<Map<String, String>> meta = Parameter.metaParam();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ public static class Builder extends FieldMapper.Builder {

final Parameter<SimilarityProvider> similarity = TextParams.similarity(m -> builder(m).similarity.getValue());
final Parameter<String> indexOptions = TextParams.textIndexOptions(m -> builder(m).indexOptions.getValue());
final Parameter<Boolean> norms = TextParams.norms(true, m -> builder(m).norms.getValue());
final Parameter<Boolean> norms = Parameter.normsParam(m -> builder(m).norms.getValue(), true);
final Parameter<String> termVectors = TextParams.termVectors(m -> builder(m).termVectors.getValue());

private final Parameter<Map<String, String>> meta = Parameter.metaParam();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1325,6 +1325,18 @@ public static Parameter<Boolean> docValuesParam(Function<FieldMapper, Boolean> i
return Parameter.boolParam("doc_values", false, initializer, defaultValue);
}

public static Parameter<Boolean> normsParam(Function<FieldMapper, Boolean> initializer, boolean defaultValue) {
// norms can be updated from 'true' to 'false' but not vice-versa
return Parameter.boolParam("norms", true, initializer, defaultValue)
.setMergeValidator((prev, curr, c) -> prev == curr || (prev && curr == false));
}

public static Parameter<Boolean> normsParam(Function<FieldMapper, Boolean> initializer, Supplier<Boolean> defaultValueSupplier) {
// norms can be updated from 'true' to 'false' but not vice-versa
return Parameter.boolParam("norms", true, initializer, defaultValueSupplier)
.setMergeValidator((prev, curr, c) -> prev == curr || (prev && curr == false));
}

/**
* Defines a script parameter
* @param initializer retrieves the equivalent parameter from an existing FieldMapper for use in merges
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ public static final class Builder extends FieldMapper.DimensionBuilder {
private final IndexSortConfig indexSortConfig;
private final IndexMode indexMode;
private final Parameter<String> indexOptions = TextParams.keywordIndexOptions(m -> toType(m).indexOptions);
private final Parameter<Boolean> hasNorms = TextParams.norms(false, m -> toType(m).fieldType.omitNorms() == false);
private final Parameter<Boolean> hasNorms = Parameter.normsParam(m -> toType(m).fieldType.omitNorms() == false, false);
private final Parameter<SimilarityProvider> similarity = TextParams.similarity(
m -> toType(m).fieldType().getTextSearchInfo().similarity()
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.IndexMode;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.index.analysis.AnalyzerScope;
Expand Down Expand Up @@ -239,15 +240,17 @@ public static class Builder extends FieldMapper.Builder {

private final IndexVersion indexCreatedVersion;
private final Parameter<Boolean> store;
private final Parameter<Boolean> norms;

private final boolean isSyntheticSourceEnabled;

private final IndexMode indexMode;

private final Parameter<Boolean> index = Parameter.indexParam(m -> ((TextFieldMapper) m).index, true);

final Parameter<SimilarityProvider> similarity = TextParams.similarity(m -> ((TextFieldMapper) m).similarity);

final Parameter<String> indexOptions = TextParams.textIndexOptions(m -> ((TextFieldMapper) m).indexOptions);
final Parameter<Boolean> norms = TextParams.norms(true, m -> ((TextFieldMapper) m).norms);
final Parameter<String> termVectors = TextParams.termVectors(m -> ((TextFieldMapper) m).termVectors);

final Parameter<Boolean> fieldData = Parameter.boolParam("fielddata", true, m -> ((TextFieldMapper) m).fieldData, false);
Expand Down Expand Up @@ -290,26 +293,37 @@ public static class Builder extends FieldMapper.Builder {
private final boolean withinMultiField;

public Builder(String name, IndexAnalyzers indexAnalyzers, boolean isSyntheticSourceEnabled) {
this(name, IndexVersion.current(), indexAnalyzers, isSyntheticSourceEnabled, false);
this(name, IndexVersion.current(), null, indexAnalyzers, isSyntheticSourceEnabled, false);
}

public Builder(
String name,
IndexVersion indexCreatedVersion,
IndexMode indexMode,
IndexAnalyzers indexAnalyzers,
boolean isSyntheticSourceEnabled,
boolean withinMultiField
) {
super(name);

this.indexCreatedVersion = indexCreatedVersion;
this.indexMode = indexMode;
this.isSyntheticSourceEnabled = isSyntheticSourceEnabled;
this.withinMultiField = withinMultiField;

// don't enable norms by default if the index is LOGSDB or TSDB based
this.norms = Parameter.normsParam(
m -> ((TextFieldMapper) m).norms,
() -> indexMode != IndexMode.LOGSDB && indexMode != IndexMode.TIME_SERIES
);

// If synthetic source is used we need to either store this field
// to recreate the source or use keyword multi-fields for that.
// So if there are no suitable multi-fields we will default to
// storing the field without requiring users to explicitly set 'store'.
//
// If 'store' parameter was explicitly provided we'll reject the request.
// Note that if current builder is a multi field, then we don't need to store, given that responsibility lies with parent field
this.withinMultiField = withinMultiField;
this.store = Parameter.storeParam(m -> ((TextFieldMapper) m).store, () -> {
if (multiFieldsNotStoredByDefaultIndexVersionCheck(indexCreatedVersion)) {
return isSyntheticSourceEnabled
Expand All @@ -319,14 +333,12 @@ public Builder(
return isSyntheticSourceEnabled && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false;
}
});
this.indexCreatedVersion = indexCreatedVersion;
this.analyzers = new TextParams.Analyzers(
indexAnalyzers,
m -> ((TextFieldMapper) m).indexAnalyzer,
m -> (((TextFieldMapper) m).positionIncrementGap),
indexCreatedVersion
);
this.isSyntheticSourceEnabled = isSyntheticSourceEnabled;
}

public static boolean multiFieldsNotStoredByDefaultIndexVersionCheck(IndexVersion indexCreatedVersion) {
Expand Down Expand Up @@ -508,6 +520,7 @@ public TextFieldMapper build(MapperBuilderContext context) {
(n, c) -> new Builder(
n,
c.indexVersionCreated(),
c.getIndexSettings().getMode(),
c.getIndexAnalyzers(),
SourceFieldMapper.isSynthetic(c.getIndexSettings()),
c.isWithinMultiField()
Expand Down Expand Up @@ -1319,6 +1332,7 @@ public Query existsQuery(SearchExecutionContext context) {
}

private final IndexVersion indexCreatedVersion;
private final IndexMode indexMode;
private final boolean index;
private final boolean store;
private final String indexOptions;
Expand Down Expand Up @@ -1357,6 +1371,7 @@ private TextFieldMapper(
this.prefixFieldInfo = prefixFieldInfo;
this.phraseFieldInfo = phraseFieldInfo;
this.indexCreatedVersion = builder.indexCreatedVersion;
this.indexMode = builder.indexMode;
this.indexAnalyzer = builder.analyzers.getIndexAnalyzer();
this.indexAnalyzers = builder.analyzers.indexAnalyzers;
this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue();
Expand Down Expand Up @@ -1394,7 +1409,9 @@ public Map<String, NamedAnalyzer> indexAnalyzers() {

@Override
public FieldMapper.Builder getMergeBuilder() {
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, isSyntheticSourceEnabled, isWithinMultiField).init(this);
return new Builder(leafName(), indexCreatedVersion, indexMode, indexAnalyzers, isSyntheticSourceEnabled, isWithinMultiField).init(
this
);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,11 +123,6 @@ private NamedAnalyzer wrapAnalyzer(NamedAnalyzer a) {
}
}

public static Parameter<Boolean> norms(boolean defaultValue, Function<FieldMapper, Boolean> initializer) {
// norms can be updated from 'true' to 'false' but not vv
return Parameter.boolParam("norms", true, initializer, defaultValue).setMergeValidator((o, n, c) -> o == n || (o && n == false));
}

public static Parameter<SimilarityProvider> similarity(Function<FieldMapper, SimilarityProvider> init) {
return new Parameter<>(
"similarity",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.IndexMode;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.IndexVersion;
Expand Down Expand Up @@ -79,6 +80,8 @@
import org.junit.AssumptionViolatedException;

import java.io.IOException;
import java.time.Instant;
import java.time.temporal.ChronoUnit;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
Expand Down Expand Up @@ -1432,4 +1435,143 @@ public void testEmpty() throws Exception {
assertFalse(dv.advanceExact(3));
});
}

public void testNormalizeByDefault() throws IOException {
// given
Settings.Builder indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.MODE.getKey(), IndexMode.STANDARD.getName());
Settings indexSettings = indexSettingsBuilder.build();

XContentBuilder mapping = mapping(b -> {
b.startObject("potato");
b.field("type", "text");
b.endObject();
});

var source = source(b -> b.field("potato", "a potato flew around my room"));

// when
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
ParsedDocument doc = mapper.parse(source);

List<IndexableField> fields = doc.rootDoc().getFields("potato");
IndexableFieldType fieldType = fields.get(0).fieldType();

// then
assertThat(fieldType.omitNorms(), is(false));
}

public void testNormalizeWhenIndexModeIsNotGiven() throws IOException {
// given
Settings.Builder indexSettingsBuilder = getIndexSettingsBuilder();
Settings indexSettings = indexSettingsBuilder.build();

XContentBuilder mapping = mapping(b -> {
b.startObject("potato");
b.field("type", "text");
b.endObject();
});

var source = source(b -> b.field("potato", "a potato flew around my room"));

// when
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
ParsedDocument doc = mapper.parse(source);

List<IndexableField> fields = doc.rootDoc().getFields("potato");
IndexableFieldType fieldType = fields.get(0).fieldType();

// then
assertThat(fieldType.omitNorms(), is(false));
}

public void testNormalizeWhenIndexModeIsNull() throws IOException {
// given
Settings.Builder indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.MODE.getKey(), (String) null);
Settings indexSettings = indexSettingsBuilder.build();

XContentBuilder mapping = mapping(b -> {
b.startObject("potato");
b.field("type", "text");
b.endObject();
});

var source = source(b -> b.field("potato", "a potato flew around my room"));

// when
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
ParsedDocument doc = mapper.parse(source);

List<IndexableField> fields = doc.rootDoc().getFields("potato");
IndexableFieldType fieldType = fields.get(0).fieldType();

// then
assertThat(fieldType.omitNorms(), is(false));
}

public void testDontNormalizeWhenIndexModeIsLogsDB() throws IOException {
// given
Settings.Builder indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.MODE.getKey(), IndexMode.LOGSDB.getName());
Settings indexSettings = indexSettingsBuilder.build();

XContentBuilder mapping = mapping(b -> {
b.startObject("potato");
b.field("type", "text");
b.endObject();
});

var source = source(b -> {
b.field("@timestamp", Instant.now());
b.field("potato", "a potato flew around my room");
});

// when
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
ParsedDocument doc = mapper.parse(source);

List<IndexableField> fields = doc.rootDoc().getFields("potato");
IndexableFieldType fieldType = fields.get(0).fieldType();

// then
assertThat(fieldType.omitNorms(), is(true));
}

public void testDontNormalizeWhenIndexModeIsTSDB() throws IOException {
// given
Instant currentTime = Instant.now();
Settings.Builder indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.MODE.getKey(), IndexMode.TIME_SERIES.getName())
.put(IndexSettings.TIME_SERIES_START_TIME.getKey(), currentTime.minus(1, ChronoUnit.HOURS).toEpochMilli())
.put(IndexSettings.TIME_SERIES_END_TIME.getKey(), currentTime.plus(1, ChronoUnit.HOURS).toEpochMilli())
.put(IndexMetadata.INDEX_ROUTING_PATH.getKey(), "dimension");
Settings indexSettings = indexSettingsBuilder.build();

XContentBuilder mapping = mapping(b -> {
b.startObject("potato");
b.field("type", "text");
b.endObject();

b.startObject("@timestamp");
b.field("type", "date");
b.endObject();
});

var source = source(TimeSeriesRoutingHashFieldMapper.DUMMY_ENCODED_VALUE, b -> {
b.field("@timestamp", Instant.now());
b.field("potato", "a potato flew around my room");
}, null);

// when
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
ParsedDocument doc = mapper.parse(source);

List<IndexableField> fields = doc.rootDoc().getFields("potato");
IndexableFieldType fieldType = fields.get(0).fieldType();

// then
assertThat(fieldType.omitNorms(), is(true));
}

}