Skip to content

Commit c80e60a

Browse files
authored
Fix field split in Top K sketch (#111)
1 parent a454bdd commit c80e60a

File tree

2 files changed

+37
-3
lines changed

2 files changed

+37
-3
lines changed

src/main/java/com/yahoo/bullet/querying/aggregations/SketchingStrategy.java

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,10 @@
1212
import com.yahoo.bullet.result.Clip;
1313
import com.yahoo.bullet.result.Meta;
1414

15-
import java.util.Arrays;
15+
import java.util.ArrayList;
1616
import java.util.List;
1717
import java.util.Map;
1818
import java.util.Objects;
19-
import java.util.regex.Pattern;
2019
import java.util.stream.Collectors;
2120
import java.util.stream.Stream;
2221

@@ -111,7 +110,19 @@ String composeField(Stream<String> fields) {
111110
* @return A {@link List} of the fields that this field was made of.
112111
*/
113112
List<String> decomposeField(String field) {
114-
return Arrays.asList(field.split(Pattern.quote(separator)));
113+
List<String> fields = new ArrayList<>();
114+
int index = 0;
115+
while (true) {
116+
int nextIndex = field.indexOf(separator, index);
117+
if (nextIndex >= 0) {
118+
fields.add(field.substring(index, nextIndex));
119+
index = nextIndex + separator.length();
120+
} else {
121+
fields.add(field.substring(index));
122+
break;
123+
}
124+
}
125+
return fields;
115126
}
116127

117128
private String getMetaKey() {

src/test/java/com/yahoo/bullet/querying/aggregations/FrequentItemsSketchingStrategyTest.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,4 +367,27 @@ public void testResetting() {
367367
Assert.assertEquals(topK.getRecords(), records);
368368
Assert.assertEquals(topK.getMetadata().asMap(), result.getMeta().asMap());
369369
}
370+
371+
@Test
372+
public void testEmptyValues() {
373+
FrequentItemsSketchingStrategy topK = makeTopK(asList("A", "B"), 64, 20);
374+
375+
topK.consume(RecordBox.get().add("A", String.valueOf("")).add("B", String.valueOf("_")).getRecord());
376+
topK.consume(RecordBox.get().add("A", String.valueOf("_")).add("B", String.valueOf("")).getRecord());
377+
topK.consume(RecordBox.get().add("A", String.valueOf("")).add("B", String.valueOf("")).getRecord());
378+
379+
Clip result = topK.getResult();
380+
381+
List<BulletRecord> records = result.getRecords();
382+
Assert.assertEquals(records.size(), 3);
383+
for (BulletRecord actual : records) {
384+
Assert.assertEquals(actual.fieldCount(), 3);
385+
Assert.assertNotNull(actual.typedGet("A").getValue());
386+
Assert.assertNotNull(actual.typedGet("B").getValue());
387+
Assert.assertEquals(actual.typedGet(COUNT_NAME).getValue(), 1L);
388+
}
389+
390+
Assert.assertEquals(topK.getRecords(), records);
391+
Assert.assertEquals(topK.getMetadata().asMap(), result.getMeta().asMap());
392+
}
370393
}

0 commit comments

Comments
 (0)