Skip to content

Commit 7029b19

Browse files
committed
fix: make column.unnest().topk() work
Also simplifies the implementation of Column.topk() and makes it re-use the implementation of Table.topk() This does NOT fix `column.unnest().topk(by=<anything besides None>)`. This is a rare case so I decided to just punt on implementing it
1 parent 7671bda commit 7029b19

File tree

2 files changed

+38
-9
lines changed

2 files changed

+38
-9
lines changed

ibis/backends/tests/test_generic.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import ibis.expr.datatypes as dt
1717
import ibis.selectors as s
1818
from ibis import _
19+
from ibis.backends.tests.conftest import NO_ARRAY_SUPPORT
1920
from ibis.backends.tests.errors import (
2021
ClickHouseDatabaseError,
2122
ExaQueryError,
@@ -2390,6 +2391,38 @@ def test_topk_counts_null(con):
23902391
assert result[0].as_py() == 1
23912392

23922393

2394+
@NO_ARRAY_SUPPORT
2395+
def test_topk_unnest_count(con: ibis.BaseBackend):
2396+
t = con.create_table(
2397+
ibis.util.gen_name("topk_unnest_count"),
2398+
{"x": [[1, 2, 3], [1, 2, None], []]},
2399+
temp=True,
2400+
)
2401+
tk = t.x.unnest().topk(name="n")
2402+
n_1s = tk.filter(_.x == 1)["n"].as_scalar()
2403+
result = con.to_pyarrow(n_1s).as_py()
2404+
assert result == 2
2405+
2406+
tk = t.x.unnest().topk()
2407+
n_1s = tk.filter(_.x == 1)["x_count"].as_scalar()
2408+
result = con.to_pyarrow(n_1s).as_py()
2409+
assert result == 2
2410+
2411+
2412+
@pytest.mark.xfail(reason="The unnest is not placed in the right place in the query")
2413+
def test_topk_unnest_max(con: ibis.BaseBackend):
2414+
t = con.create_table(
2415+
ibis.util.gen_name("topk_counts_unnest"),
2416+
{"x": [[1, 2, 3], [1, 2, None], []]},
2417+
temp=True,
2418+
)
2419+
v = t.x.unnest()
2420+
tk = v.topk(by=v.max(), name="n")
2421+
n_1s = tk.filter(_.x == 1)["n"].as_scalar()
2422+
result = con.to_pyarrow(n_1s).as_py()
2423+
assert result == 1
2424+
2425+
23932426
@pytest.mark.notyet(
23942427
"clickhouse",
23952428
raises=AssertionError,

ibis/expr/types/generic.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2238,18 +2238,14 @@ def topk(
22382238
"""
22392239
from ibis.expr.types.relations import bind
22402240

2241+
if by is None:
2242+
return self.as_table().topk(k=k, name=name)
2243+
22412244
try:
2242-
(table,) = self.op().relations
2245+
(table_op,) = self.op().relations
22432246
except ValueError:
22442247
raise com.IbisTypeError("TopK must depend on exactly one table.")
2245-
2246-
table = table.to_expr()
2247-
2248-
if by is None and name is None:
2249-
# if `by` is something more complex, the _count doesn't make sense.
2250-
name = f"{self.get_name()}_count"
2251-
if by is None:
2252-
by = lambda t: t.count()
2248+
table: ibis.Table = table_op.to_expr()
22532249

22542250
(metric,) = bind(table, by)
22552251
if name is not None:

0 commit comments

Comments
 (0)