-
Notifications
You must be signed in to change notification settings - Fork 25.4k
ESQL - KNN functions with non-pushed down filters #131708
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 19 commits
7f313f4
2b1a4fa
363e50e
a60aa3d
63c62c7
4440717
dee1e91
6438bc1
21bb1f1
376be41
a24645d
87285f1
aa89a4d
beea012
bf05ddd
9e673f3
0ff8c23
521bff5
40fa387
605f8b0
b11c465
e9d3ba1
8d8b7fb
5d96906
8f1fb3a
b132338
03d4c21
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,6 @@ | |
|
||
import org.elasticsearch.compute.data.Block; | ||
import org.elasticsearch.compute.data.BlockFactory; | ||
import org.elasticsearch.compute.data.DocVector; | ||
import org.elasticsearch.compute.data.DoubleBlock; | ||
import org.elasticsearch.compute.data.DoubleVector; | ||
import org.elasticsearch.compute.data.Page; | ||
|
@@ -46,9 +45,9 @@ public ScoreOperator(BlockFactory blockFactory, ExpressionScorer scorer, int sco | |
|
||
@Override | ||
protected Page process(Page page) { | ||
assert page.getBlockCount() >= 2 : "Expected at least 2 blocks, got " + page.getBlockCount(); | ||
assert page.getBlock(0).asVector() instanceof DocVector : "Expected a DocVector, got " + page.getBlock(0).asVector(); | ||
assert page.getBlock(1).asVector() instanceof DoubleVector : "Expected a DoubleVector, got " + page.getBlock(1).asVector(); | ||
assert page.getBlockCount() > scoreBlockPosition : "Expected to get a score block in position " + scoreBlockPosition; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was an uncovered bug |
||
assert page.getBlock(scoreBlockPosition).asVector() instanceof DoubleVector | ||
: "Expected a DoubleVector as a score block, got " + page.getBlock(scoreBlockPosition).asVector(); | ||
|
||
Block[] blocks = new Block[page.getBlockCount()]; | ||
for (int i = 0; i < page.getBlockCount(); i++) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,7 @@ | |
# top-n query at the shard level | ||
|
||
knnSearch | ||
required_capability: knn_function_v3 | ||
required_capability: knn_function_v4 | ||
|
||
// tag::knn-function[] | ||
from colors metadata _score | ||
|
@@ -30,7 +30,7 @@ chartreuse | [127.0, 255.0, 0.0] | |
; | ||
|
||
knnSearchWithSimilarityOption | ||
required_capability: knn_function_v3 | ||
required_capability: knn_function_v4 | ||
|
||
from colors metadata _score | ||
| where knn(rgb_vector, [255,192,203], 140, {"similarity": 40}) | ||
|
@@ -46,7 +46,7 @@ wheat | [245.0, 222.0, 179.0] | |
; | ||
|
||
knnHybridSearch | ||
required_capability: knn_function_v3 | ||
required_capability: knn_function_v4 | ||
|
||
from colors metadata _score | ||
| where match(color, "blue") or knn(rgb_vector, [65,105,225], 10) | ||
|
@@ -68,7 +68,7 @@ yellow | [255.0, 255.0, 0.0] | |
; | ||
|
||
knnWithPrefilter | ||
required_capability: knn_function_v3 | ||
required_capability: knn_function_v4 | ||
|
||
from colors metadata _score | ||
| where knn(rgb_vector, [128,128,0], 10) and (match(color, "olive") or match(color, "green")) | ||
|
@@ -82,7 +82,7 @@ green | [0.0, 128.0, 0.0] | |
; | ||
|
||
knnWithNegatedPrefilter | ||
required_capability: knn_function_v3 | ||
required_capability: knn_function_v4 | ||
|
||
from colors metadata _score | ||
| where knn(rgb_vector, [128,128,0], 10) and not (match(color, "olive") or match(color, "chocolate")) | ||
|
@@ -105,7 +105,7 @@ orange | [255.0, 165.0, 0.0] | |
; | ||
|
||
knnAfterKeep | ||
required_capability: knn_function_v3 | ||
required_capability: knn_function_v4 | ||
|
||
from colors metadata _score | ||
| keep rgb_vector, color, _score | ||
|
@@ -124,7 +124,7 @@ rgb_vector:dense_vector | |
; | ||
|
||
knnAfterDrop | ||
required_capability: knn_function_v3 | ||
required_capability: knn_function_v4 | ||
|
||
from colors metadata _score | ||
| drop primary | ||
|
@@ -143,7 +143,7 @@ lime | [0.0, 255.0, 0.0] | |
; | ||
|
||
knnAfterEval | ||
required_capability: knn_function_v3 | ||
required_capability: knn_function_v4 | ||
|
||
from colors metadata _score | ||
| eval composed_name = locate(color, " ") > 0 | ||
|
@@ -162,7 +162,7 @@ golden rod | true | |
; | ||
|
||
knnWithConjunction | ||
required_capability: knn_function_v3 | ||
required_capability: knn_function_v4 | ||
|
||
from colors metadata _score | ||
| where knn(rgb_vector, [255,255,238], 10) and hex_code like "#FFF*" | ||
|
@@ -181,7 +181,7 @@ yellow | #FFFF00 | [255.0, 255.0, 0.0] | |
; | ||
|
||
knnWithDisjunctionAndFiltersConjunction | ||
required_capability: knn_function_v3 | ||
required_capability: knn_function_v4 | ||
|
||
from colors metadata _score | ||
| where (knn(rgb_vector, [0,255,255], 140) or knn(rgb_vector, [128, 0, 255], 10)) and primary == true | ||
|
@@ -204,7 +204,7 @@ yellow | [255.0, 255.0, 0.0] | |
; | ||
|
||
knnWithNegationsAndFiltersConjunction | ||
required_capability: knn_function_v3 | ||
required_capability: knn_function_v4 | ||
|
||
from colors metadata _score | ||
| where (knn(rgb_vector, [0,255,255], 140) and not(primary == true and match(color, "blue"))) | ||
|
@@ -227,62 +227,76 @@ azure | [240.0, 255.0, 255.0] | |
; | ||
|
||
knnWithNonPushableConjunction | ||
required_capability: knn_function_v3 | ||
required_capability: knn_function_v4 | ||
|
||
from colors metadata _score | ||
| eval composed_name = locate(color, " ") > 0 | ||
| where knn(rgb_vector, [128,128,0], 140) and composed_name == false | ||
| where knn(rgb_vector, [128,128,0], 10) and composed_name == false | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can see the change in action - we no longer need to use a large number for k to maintain semantics, nor to use limit at the end. |
||
| sort _score desc, color asc | ||
| keep color, composed_name | ||
| limit 10 | ||
| keep color | ||
; | ||
|
||
color:text | composed_name:boolean | ||
olive | false | ||
sienna | false | ||
chocolate | false | ||
peru | false | ||
brown | false | ||
firebrick | false | ||
chartreuse | false | ||
gray | false | ||
green | false | ||
maroon | false | ||
color:text | ||
olive | ||
sienna | ||
chocolate | ||
peru | ||
brown | ||
firebrick | ||
chartreuse | ||
green | ||
maroon | ||
; | ||
|
||
testKnnWithNonPushableDisjunctions | ||
required_capability: knn_function_v3 | ||
required_capability: knn_function_v4 | ||
|
||
from colors metadata _score | ||
| where knn(rgb_vector, [128,128,0], 140, {"similarity": 30}) or length(color) > 10 | ||
| where knn(rgb_vector, [128,128,0], 10) or length(color) > 10 | ||
| sort _score desc, color asc | ||
| keep color | ||
; | ||
|
||
color:text | ||
olive | ||
sienna | ||
chocolate | ||
peru | ||
golden rod | ||
brown | ||
firebrick | ||
chartreuse | ||
green | ||
maroon | ||
aqua marine | ||
lemon chiffon | ||
papaya whip | ||
; | ||
|
||
testKnnWithNonPushableDisjunctionsOnComplexExpressions | ||
required_capability: knn_function_v3 | ||
testKnnWithNonPushableConjunctionsOnComplexExpressions | ||
required_capability: knn_function_v4 | ||
|
||
from colors metadata _score | ||
| where (knn(rgb_vector, [128,128,0], 140, {"similarity": 70}) and length(color) < 10) or (knn(rgb_vector, [128,0,128], 140, {"similarity": 60}) and primary == false) | ||
| where knn(rgb_vector, [128,128,0], 10) and length(color) < 7 and knn(rgb_vector, [128,0,128], 10) and primary == false | ||
| sort _score desc, color asc | ||
| keep color, primary | ||
| keep color | ||
; | ||
|
||
color:text | primary:boolean | ||
olive | false | ||
purple | false | ||
indigo | false | ||
color:text | ||
olive | ||
purple | ||
indigo | ||
sienna | ||
brown | ||
peru | ||
maroon | ||
navy | ||
tomato | ||
orange | ||
; | ||
|
||
testKnnInStatsNonPushable | ||
required_capability: knn_function_v3 | ||
required_capability: knn_function_v4 | ||
|
||
from colors | ||
| where length(color) < 10 | ||
|
@@ -294,7 +308,7 @@ c: long | |
; | ||
|
||
testKnnInStatsWithGrouping | ||
required_capability: knn_function_v3 | ||
required_capability: knn_function_v4 | ||
required_capability: full_text_functions_in_stats_where | ||
|
||
from colors | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Made
LuceneQueryEvaluator
more robust