Skip to content

Commit 2f32a16

Browse files
committed
fix(postgres): support uuid.to_pyarrow()
1 parent 1292bb1 commit 2f32a16

File tree

3 files changed

+61
-1
lines changed

3 files changed

+61
-1
lines changed

ibis/backends/postgres/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from ibis.backends import CanCreateDatabase, CanListCatalog, PyArrowExampleLoader
2727
from ibis.backends.sql import SQLBackend
2828
from ibis.backends.sql.compilers.base import TRUE, C, ColGen
29+
from ibis.formats.pyarrow import to_pa_compatible
2930

3031
if TYPE_CHECKING:
3132
from collections.abc import Callable, Mapping
@@ -808,6 +809,7 @@ def _batches(self: Self, *, schema: pa.Schema, query: str):
808809

809810
self._run_pre_execute_hooks(expr)
810811

812+
expr = to_pa_compatible(expr)
811813
schema = expr.as_table().schema().to_pyarrow()
812814
query = self.compile(expr, limit=limit, params=params)
813815
return pa.RecordBatchReader.from_batches(

ibis/backends/tests/test_uuid.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,3 +67,10 @@ def test_uuid_unique_each_row(con):
6767
con.tables.functional_alltypes.mutate(uuid=ibis.uuid()).limit(2).uuid.nunique()
6868
)
6969
assert expr.execute() == 2
70+
71+
72+
def test_uuid_pyarrow(con):
73+
pa = pytest.importorskip("pyarrow")
74+
expr = ibis.literal(RAW_TEST_UUID, type=dt.uuid)
75+
result = con.to_pyarrow(expr)
76+
assert result == pa.scalar(RAW_TEST_UUID)

ibis/formats/pyarrow.py

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from __future__ import annotations
22

33
import contextlib
4-
from typing import TYPE_CHECKING, Any
4+
from typing import TYPE_CHECKING, Any, TypeVar
55

66
import pyarrow as pa
77
import pyarrow_hotfix # noqa: F401
@@ -19,6 +19,10 @@
1919
import polars as pl
2020
import pyarrow.dataset as ds
2121

22+
import ibis
23+
24+
TableOrValue = TypeVar("TableOrValue", bound=ibis.Table | ibis.Value)
25+
2226

2327
_from_pyarrow_types = {
2428
pa.int8(): dt.Int8,
@@ -384,3 +388,50 @@ def to_pyarrow_dataset(self, schema: Schema) -> ds.Dataset:
384388

385389
def to_polars(self, schema: Schema) -> pa.Table:
386390
raise com.UnsupportedOperationError(self.ERROR_MESSAGE)
391+
392+
393+
def to_pa_compatible(table_or_val: TableOrValue) -> TableOrValue:
394+
"""Convert (on the backend) an Ibis table or value to a PyArrow compatible type.
395+
396+
If we have a uuid type on the backend, we are going to represent it on the
397+
pyarrow side as a string. So, since we are going to cast it to a string anyway,
398+
we might as well do it on the backend side. This is a performance gain.
399+
400+
Not only that, but it also avoids some issues where we have a type (eg uuid)
401+
which is not supported by pyarrow, and we run into trouble materializing
402+
it to pyarrow. See https://github.com/ibis-project/ibis/issues/8532
403+
"""
404+
import ibis
405+
406+
if isinstance(table_or_val, ibis.Table):
407+
return _to_pa_compatible_table(table_or_val)
408+
elif isinstance(table_or_val, ibis.Value):
409+
return _to_pa_compatible_value(table_or_val)
410+
else:
411+
raise TypeError(f"Unsupported type: {type(table_or_val)}")
412+
413+
414+
def _to_pa_compatible_value(val: ibis.Value) -> ibis.Value:
415+
original_type = val.type()
416+
# Convert the original type back and forth to check biyectivity
417+
# of types mappings pyarrow <-> ibis.
418+
pa_compatible_type = PyArrowType.to_ibis(PyArrowType.from_ibis(original_type))
419+
if original_type != pa_compatible_type:
420+
# If the original type is not compatible with PyArrow, we cast
421+
# server side to match the types that PyArrow expects in Ibis.
422+
val = val.cast(pa_compatible_type)
423+
return val
424+
425+
426+
def _to_pa_compatible_table(table: ibis.Table) -> ibis.Table:
427+
original_schema = table.schema()
428+
# Convert the original schema back and forth to check biyectivity
429+
# of types mappings pyarrow <-> ibis.
430+
pa_compatible_schema = PyArrowSchema.to_ibis(
431+
PyArrowSchema.from_ibis(original_schema)
432+
)
433+
if original_schema != pa_compatible_schema:
434+
# If the original schema is not compatible with PyArrow, we cast
435+
# server side to match the types that PyArrow expects in Ibis.
436+
table = table.cast(pa_compatible_schema)
437+
return table

0 commit comments

Comments
 (0)