Skip to content

Special field visitor for _ignored_source #131885

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import org.apache.lucene.index.FieldInfo;
import org.elasticsearch.index.mapper.IgnoredFieldMapper;
import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper;

import java.util.HashSet;
import java.util.Set;
Expand Down Expand Up @@ -50,6 +51,11 @@ public Status needsField(FieldInfo fieldInfo) {
if (fields.contains(fieldInfo.name)) {
return Status.YES;
}

if (IgnoredSourceFieldMapper.NAME.equals(fieldInfo.name)) {
return Status.YES;
}

return Status.NO;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.fieldvisitor;

import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.StoredFieldVisitor;
import org.elasticsearch.common.CheckedBiConsumer;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader;
import org.elasticsearch.index.mapper.FallbackSyntheticSourceBlockLoader;
import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper;
import org.elasticsearch.search.fetch.StoredFieldsSpec;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

class IgnoredSourceFieldLoader extends StoredFieldLoader {

final Set<String> potentialFieldsInIgnoreSource;

IgnoredSourceFieldLoader(StoredFieldsSpec spec) {
Set<String> potentialFieldsInIgnoreSource = new HashSet<>();
for (String requiredStoredField : spec.requiredStoredFields()) {
if (requiredStoredField.startsWith(IgnoredSourceFieldMapper.NAME)) {
String fieldName = requiredStoredField.substring(IgnoredSourceFieldMapper.NAME.length());
potentialFieldsInIgnoreSource.addAll(FallbackSyntheticSourceBlockLoader.splitIntoFieldPaths(fieldName));
}
}
this.potentialFieldsInIgnoreSource = potentialFieldsInIgnoreSource;
}

@Override
public LeafStoredFieldLoader getLoader(LeafReaderContext ctx, int[] docs) throws IOException {
var reader = sequentialReader(ctx);
var visitor = new SFV(potentialFieldsInIgnoreSource);
return new LeafStoredFieldLoader() {

private int doc = -1;

@Override
public void advanceTo(int doc) throws IOException {
if (doc != this.doc) {
visitor.reset();
reader.accept(doc, visitor);
this.doc = doc;
}
}

@Override
public BytesReference source() {
return null;
}

@Override
public String id() {
return null;
}

@Override
public String routing() {
return null;
}

@Override
public Map<String, List<Object>> storedFields() {
return Map.of(IgnoredSourceFieldMapper.NAME, visitor.values);
}
};
}

@Override
public List<String> fieldsToLoad() {
return List.of(IgnoredSourceFieldMapper.NAME);
}

static class SFV extends StoredFieldVisitor {

final List<Object> values = new ArrayList<>();
final Set<String> potentialFieldsInIgnoreSource;

SFV(Set<String> potentialFieldsInIgnoreSource) {
this.potentialFieldsInIgnoreSource = potentialFieldsInIgnoreSource;
}

@Override
public Status needsField(FieldInfo fieldInfo) throws IOException {
if (IgnoredSourceFieldMapper.NAME.equals(fieldInfo.name)) {
return Status.YES;
} else {
return Status.NO;
}
}

@Override
public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException {
var result = IgnoredSourceFieldMapper.decodeIfMatch(value, potentialFieldsInIgnoreSource);
if (result != null) {
values.add(result);
}
}

void reset() {
values.clear();
}

}

static boolean supports(StoredFieldsSpec spec) {
if (spec.requiresSource() || spec.requiresMetadata()) {
return false;
}

for (String fieldName : spec.requiredStoredFields()) {
if (fieldName.startsWith(IgnoredSourceFieldMapper.NAME) == false) {
return false;
}
}
return true;
}

// TODO: use provided one
private static CheckedBiConsumer<Integer, StoredFieldVisitor, IOException> sequentialReader(LeafReaderContext ctx) throws IOException {
LeafReader leafReader = ctx.reader();
if (leafReader instanceof SequentialStoredFieldsLeafReader lf) {
return lf.getSequentialStoredFieldsReader()::document;
}
return leafReader.storedFields()::document;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ public static StoredFieldLoader fromSpec(StoredFieldsSpec spec) {
if (spec.noRequirements()) {
return StoredFieldLoader.empty();
}
if (IgnoredSourceFieldLoader.supports(spec)) {
return new IgnoredSourceFieldLoader(spec);
}
return create(spec.requiresSource(), spec.requiredStoredFields());
}

Expand Down Expand Up @@ -91,6 +94,10 @@ public static StoredFieldLoader fromSpecSequential(StoredFieldsSpec spec) {
if (spec.noRequirements()) {
return StoredFieldLoader.empty();
}
if (IgnoredSourceFieldLoader.supports(spec)) {
return new IgnoredSourceFieldLoader(spec);
}

List<String> fieldsToLoad = fieldsToLoad(spec.requiresSource(), spec.requiredStoredFields());
return new StoredFieldLoader() {
@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,12 @@
public abstract class FallbackSyntheticSourceBlockLoader implements BlockLoader {
private final Reader<?> reader;
private final String fieldName;
private final Set<String> fieldPaths;

protected FallbackSyntheticSourceBlockLoader(Reader<?> reader, String fieldName) {
this.reader = reader;
this.fieldName = fieldName;
this.fieldPaths = splitIntoFieldPaths(fieldName);
}

@Override
Expand All @@ -52,12 +54,12 @@ public ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context) throws

@Override
public RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException {
return new IgnoredSourceRowStrideReader<>(fieldName, reader);
return new IgnoredSourceRowStrideReader<>(fieldName, reader, fieldPaths);
}

@Override
public StoredFieldsSpec rowStrideStoredFieldSpec() {
return new StoredFieldsSpec(false, false, Set.of(IgnoredSourceFieldMapper.NAME));
return new StoredFieldsSpec(false, false, Set.of(IgnoredSourceFieldMapper.NAME + "." + fieldName));
}

@Override
Expand All @@ -70,30 +72,29 @@ public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException
throw new UnsupportedOperationException();
}

private static class IgnoredSourceRowStrideReader<T> implements RowStrideReader {
// Contains name of the field and all its parents
private final Set<String> fieldNames;
public static Set<String> splitIntoFieldPaths(String fieldName) {
var paths = new HashSet<String>();
paths.add("_doc");
var current = new StringBuilder();
for (var part : fieldName.split("\\.")) {
if (current.isEmpty() == false) {
current.append('.');
}
current.append(part);
paths.add(current.toString());
}
return paths;
}

private static final class IgnoredSourceRowStrideReader<T> implements RowStrideReader {
private final String fieldName;
private final Reader<T> reader;
private final Set<String> fieldPaths;

IgnoredSourceRowStrideReader(String fieldName, Reader<T> reader) {
private IgnoredSourceRowStrideReader(String fieldName, Reader<T> reader, Set<String> fieldPaths) {
this.fieldName = fieldName;
this.reader = reader;
this.fieldNames = new HashSet<>() {
{
add("_doc");
}
};

var current = new StringBuilder();
for (String part : fieldName.split("\\.")) {
if (current.isEmpty() == false) {
current.append('.');
}
current.append(part);
fieldNames.add(current.toString());
}

this.fieldPaths = fieldPaths;
}

@Override
Expand All @@ -105,10 +106,12 @@ public void read(int docId, StoredFields storedFields, Builder builder) throws I
}

Map<String, List<IgnoredSourceFieldMapper.NameValue>> valuesForFieldAndParents = new HashMap<>();

for (Object value : ignoredSource) {
IgnoredSourceFieldMapper.NameValue nameValue = IgnoredSourceFieldMapper.decode(value);
if (fieldNames.contains(nameValue.name())) {
IgnoredSourceFieldMapper.NameValue nameValue = (value instanceof IgnoredSourceFieldMapper.NameValue nVal)
? nVal
: IgnoredSourceFieldMapper.decode(value);

if (fieldPaths.contains(nameValue.name())) {
valuesForFieldAndParents.computeIfAbsent(nameValue.name(), k -> new ArrayList<>()).add(nameValue);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,20 @@ static NameValue decode(Object field) {
return new NameValue(name, parentOffset, value, null);
}

public static NameValue decodeIfMatch(byte[] bytes, Set<String> potentialFieldsInIgnoreSource) {
int encodedSize = ByteUtils.readIntLE(bytes, 0);
int nameSize = encodedSize % PARENT_OFFSET_IN_NAME_OFFSET;
int parentOffset = encodedSize / PARENT_OFFSET_IN_NAME_OFFSET;

String name = new String(bytes, 4, nameSize, StandardCharsets.UTF_8);
if (potentialFieldsInIgnoreSource.contains(name)) {
BytesRef value = new BytesRef(bytes, 4 + nameSize, bytes.length - nameSize - 4);
return new NameValue(name, parentOffset, value, null);
} else {
return null;
}
}

// In rare cases decoding values stored in this field can fail leading to entire source
// not being available.
// We would like to have an option to lose some values in synthetic source
Expand Down
Loading