Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/131559.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 131559
summary: Lookup Join on Multiple Columns POC WIP
area: ES|QL
type: enhancement
issues: []
3 changes: 2 additions & 1 deletion docs/reference/query-languages/esql/esql-lookup-join.md
Original file line number Diff line number Diff line change
Expand Up @@ -202,5 +202,6 @@ The following are the current limitations with `LOOKUP JOIN`:
* Currently, only matching on equality is supported.
* `LOOKUP JOIN` can only use a single match field and a single index. Wildcards are not supported.
* Aliases, datemath, and datastreams are supported, as long as the index pattern matches a single concrete index {applies_to}`stack: ga 9.1.0`.
* Limitation on matching on a single field is removed. You can use a comma separated list of fields in the `ON` clause {applies_to}`stack: ga 9.2.0`.
* The name of the match field in `LOOKUP JOIN lu_idx ON match_field` must match an existing field in the query. This may require `RENAME`s or `EVAL`s to achieve.
* The query will circuit break if there are too many matching documents in the lookup index, or if the documents are too large. More precisely, `LOOKUP JOIN` works in batches of, normally, about 10,000 rows; a large amount of heap space is needed if the matching documents from the lookup index for a batch are multiple megabytes or larger. This is roughly the same as for `ENRICH`.
* The query will circuit break if there are too many matching documents in the lookup index, or if the documents are too large. More precisely, `LOOKUP JOIN` works in batches of, normally, about 10,000 rows; a large amount of heap space is needed if the matching documents from the lookup index for a batch are multiple megabytes or larger. This is roughly the same as for `ENRICH`.
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,7 @@ static TransportVersion def(int id) {
public static final TransportVersion ESQL_TOPN_TIMINGS = def(9_128_0_00);
public static final TransportVersion NODE_WEIGHTS_ADDED_TO_NODE_BALANCE_STATS = def(9_129_0_00);
public static final TransportVersion RERANK_SNIPPETS = def(9_130_0_00);
public static final TransportVersion ESQL_LOOKUP_JOIN_ON_MANY_FIELDS = def(9_131_0_00);

/*
* STOP! READ THIS FIRST! No, really,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
*/
public final class EnrichQuerySourceOperator extends SourceOperator {
private final BlockFactory blockFactory;
private final QueryList queryList;
private final LookupEnrichQueryGenerator queryList;
private int queryPosition = -1;
private final ShardContext shardContext;
private final IndexReader indexReader;
Expand All @@ -51,7 +51,7 @@ public final class EnrichQuerySourceOperator extends SourceOperator {
public EnrichQuerySourceOperator(
BlockFactory blockFactory,
int maxPageSize,
QueryList queryList,
LookupEnrichQueryGenerator queryList,
ShardContext shardContext,
Warnings warnings
) {
Expand Down Expand Up @@ -159,7 +159,7 @@ Page buildPage(int positions, IntVector.Builder positionsBuilder, IntVector.Buil
return page;
}

private Query nextQuery() {
private Query nextQuery() throws IOException {
++queryPosition;
while (isFinished() == false) {
Query query = queryList.getQuery(queryPosition);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.compute.operator.lookup;

import org.apache.lucene.search.Query;
import org.elasticsearch.core.Nullable;

import java.io.IOException;

/**
* An interface to generates queries for the lookup and enrich operators.
* This interface is used to retrieve queries based on a position index.
*/
public interface LookupEnrichQueryGenerator {

/**
* Returns the query at the given position.
*/
@Nullable
Query getQuery(int position) throws IOException;

/**
* Returns the number of queries in this generator
*/
int getPositionCount();

}
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
/**
* Generates a list of Lucene queries based on the input block.
*/
public abstract class QueryList {
public abstract class QueryList implements LookupEnrichQueryGenerator {
protected final SearchExecutionContext searchExecutionContext;
protected final AliasFilter aliasFilter;
protected final MappedFieldType field;
Expand All @@ -74,7 +74,8 @@ protected QueryList(
/**
* Returns the number of positions in this query list
*/
int getPositionCount() {
@Override
public int getPositionCount() {
return block.getPositionCount();
}

Expand All @@ -87,7 +88,8 @@ int getPositionCount() {
*/
public abstract QueryList onlySingleValues(Warnings warnings, String multiValueWarningMessage);

final Query getQuery(int position) {
@Override
public final Query getQuery(int position) {
final int valueCount = block.getValueCount(position);
if (onlySingleValueParams != null && valueCount != 1) {
if (valueCount > 1) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,16 @@ public class CsvTestsDataLoader {
private static final TestDataset HOSTS = new TestDataset("hosts");
private static final TestDataset APPS = new TestDataset("apps");
private static final TestDataset APPS_SHORT = APPS.withIndex("apps_short").withTypeMapping(Map.of("id", "short"));
private static final TestDataset MULTI_COLUMN_JOINABLE = new TestDataset(
"multi_column_joinable",
"mapping-multi_column_joinable.json",
"multi_column_joinable.csv"
);
private static final TestDataset MULTI_COLUMN_JOINABLE_LOOKUP = new TestDataset(
"multi_column_joinable_lookup",
"mapping-multi_column_joinable2.json",
"multi_column_joinable2.csv"
).withSetting("lookup-settings.json");
private static final TestDataset LANGUAGES = new TestDataset("languages");
private static final TestDataset LANGUAGES_LOOKUP = LANGUAGES.withIndex("languages_lookup").withSetting("lookup-settings.json");
private static final TestDataset LANGUAGES_LOOKUP_NON_UNIQUE_KEY = LANGUAGES_LOOKUP.withIndex("languages_lookup_non_unique_key")
Expand Down Expand Up @@ -213,7 +223,9 @@ public class CsvTestsDataLoader {
Map.entry(LOGS.indexName, LOGS),
Map.entry(MV_TEXT.indexName, MV_TEXT),
Map.entry(DENSE_VECTOR.indexName, DENSE_VECTOR),
Map.entry(COLORS.indexName, COLORS)
Map.entry(COLORS.indexName, COLORS),
Map.entry(MULTI_COLUMN_JOINABLE.indexName, MULTI_COLUMN_JOINABLE),
Map.entry(MULTI_COLUMN_JOINABLE_LOOKUP.indexName, MULTI_COLUMN_JOINABLE_LOOKUP)
);

private static final EnrichConfig LANGUAGES_ENRICH = new EnrichConfig("languages_policy", "enrich-policy-languages.json");
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
id_int,name_str,is_active_bool,ip_addr,extra1,extra2
1,Alice,true,192.168.1.1,foo,100
2,Bob,false,192.168.1.2,bar,200
3,Charlie,true,192.168.1.3,baz,300
4,David,false,192.168.1.4,qux,400
5,Eve,true,192.168.1.5,quux,500
6,,true,192.168.1.6,corge,600
7,Grace,false,,grault,700
8,Hank,true,192.168.1.8,garply,800
9,Ivy,false,192.168.1.9,waldo,900
10,John,true,192.168.1.10,fred,1000
,Kate,false,192.168.1.11,plugh,1100
12,Liam,true,192.168.1.12,xyzzy,1200
13,Mia,false,192.168.1.13,thud,1300
14,Nina,true,192.168.1.14,foo2,1400
15,Oscar,false,192.168.1.15,bar2,1500
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
id_int,name_str,is_active_bool,ip_addr,other1,other2
1,Alice,true,192.168.1.1,alpha,1000
1,Alice,true,192.168.1.2,beta,2000
2,Bob,false,192.168.1.3,gamma,3000
3,Charlie,true,192.168.1.3,delta,4000
3,Charlie,false,192.168.1.3,epsilon,5000
4,David,false,192.168.1.4,zeta,6000
5,Eve,true,192.168.1.5,eta,7000
5,Eve,true,192.168.1.5,theta,8000
6,,true,192.168.1.6,iota,9000
7,Grace,false,,kappa,10000
8,Hank,true,192.168.1.8,lambda,11000
,Kate,false,192.168.1.11,mu,12000
12,Liam,true,192.168.1.12,nu,13000
13,Mia,false,192.168.1.13,xi,14000
14,Nina,true,192.168.1.14,omicron,15000
16,Paul,true,192.168.1.16,pi,16000
Loading
Loading