|
1 | 1 | from __future__ import annotations
|
2 | 2 |
|
3 | 3 | import contextlib
|
4 |
| -from typing import TYPE_CHECKING, Any |
| 4 | +from typing import TYPE_CHECKING, Any, TypeVar |
5 | 5 |
|
6 | 6 | import pyarrow as pa
|
7 | 7 | import pyarrow_hotfix # noqa: F401
|
|
19 | 19 | import polars as pl
|
20 | 20 | import pyarrow.dataset as ds
|
21 | 21 |
|
| 22 | + import ibis |
| 23 | + |
| 24 | + TableOrValue = TypeVar("TableOrValue", bound=ibis.Table | ibis.Value) |
| 25 | + |
22 | 26 |
|
23 | 27 | _from_pyarrow_types = {
|
24 | 28 | pa.int8(): dt.Int8,
|
@@ -384,3 +388,50 @@ def to_pyarrow_dataset(self, schema: Schema) -> ds.Dataset:
|
384 | 388 |
|
385 | 389 | def to_polars(self, schema: Schema) -> pa.Table:
|
386 | 390 | raise com.UnsupportedOperationError(self.ERROR_MESSAGE)
|
| 391 | + |
| 392 | + |
| 393 | +def to_pa_compatible(table_or_val: TableOrValue) -> TableOrValue: |
| 394 | + """Convert (on the backend) an Ibis table or value to a PyArrow compatible type. |
| 395 | +
|
| 396 | + If we have a uuid type on the backend, we are going to represent it on the |
| 397 | + pyarrow side as a string. So, since we are going to cast it to a string anyway, |
| 398 | + we might as well do it on the backend side. This is a performance gain. |
| 399 | +
|
| 400 | + Not only that, but it also avoids some issues where we have a type (eg uuid) |
| 401 | + which is not supported by pyarrow, and we run into trouble materializing |
| 402 | + it to pyarrow. See https://github.com/ibis-project/ibis/issues/8532 |
| 403 | + """ |
| 404 | + import ibis |
| 405 | + |
| 406 | + if isinstance(table_or_val, ibis.Table): |
| 407 | + return _to_pa_compatible_table(table_or_val) |
| 408 | + elif isinstance(table_or_val, ibis.Value): |
| 409 | + return _to_pa_compatible_value(table_or_val) |
| 410 | + else: |
| 411 | + raise TypeError(f"Unsupported type: {type(table_or_val)}") |
| 412 | + |
| 413 | + |
| 414 | +def _to_pa_compatible_value(val: ibis.Value) -> ibis.Value: |
| 415 | + original_type = val.type() |
| 416 | + # Convert the original type back and forth to check biyectivity |
| 417 | + # of types mappings pyarrow <-> ibis. |
| 418 | + pa_compatible_type = PyArrowType.to_ibis(PyArrowType.from_ibis(original_type)) |
| 419 | + if original_type != pa_compatible_type: |
| 420 | + # If the original type is not compatible with PyArrow, we cast |
| 421 | + # server side to match the types that PyArrow expects in Ibis. |
| 422 | + val = val.cast(pa_compatible_type) |
| 423 | + return val |
| 424 | + |
| 425 | + |
| 426 | +def _to_pa_compatible_table(table: ibis.Table) -> ibis.Table: |
| 427 | + original_schema = table.schema() |
| 428 | + # Convert the original schema back and forth to check biyectivity |
| 429 | + # of types mappings pyarrow <-> ibis. |
| 430 | + pa_compatible_schema = PyArrowSchema.to_ibis( |
| 431 | + PyArrowSchema.from_ibis(original_schema) |
| 432 | + ) |
| 433 | + if original_schema != pa_compatible_schema: |
| 434 | + # If the original schema is not compatible with PyArrow, we cast |
| 435 | + # server side to match the types that PyArrow expects in Ibis. |
| 436 | + table = table.cast(pa_compatible_schema) |
| 437 | + return table |
0 commit comments