-
Notifications
You must be signed in to change notification settings - Fork 11
Move to adaptor backend #298
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
isabelizimm
merged 23 commits into
rstudio:main
from
nathanjmcdougall:feature/move-to-adaptor-backend
Jun 4, 2025
Merged
Changes from all commits
Commits
Show all changes
23 commits
Select commit
Hold shift + click to select a range
9dd8beb
Support adaptor in prepare_pin_version
nathanjmcdougall 040da5e
Use adaptor in save_data
nathanjmcdougall 4ba393d
Use adaptor for default_title
nathanjmcdougall 7898ce7
underscore prefix for _adaptors.py; abstracting df_type in default_title
nathanjmcdougall 4a3ea01
Removing duplication in _obj_name definition
nathanjmcdougall 007ad3a
Use adaptor in _create_meta
nathanjmcdougall d577b02
Pass pyright
nathanjmcdougall 3aaabbb
Fix broken import
nathanjmcdougall 56c3285
Refactoring type hints to avoid use of Self
nathanjmcdougall 0171d72
Remove singleton Union
nathanjmcdougall fe6092f
Add databackend as a dependency
nathanjmcdougall 1289134
Merge branch 'main' into feature/move-to-adaptor-backend
nathanjmcdougall 1d5c47f
dev: add ruff to pyproject.toml
machow d0fa9c9
feat: allow save_data to accept an Adaptor
machow 81f6779
Remove unnecessary underscores
nathanjmcdougall 1540500
Remove misleading/unnecessary ClassVar declaration
nathanjmcdougall dd49569
Merge branch 'feature/move-to-adaptor-backend' of https://github.com/…
nathanjmcdougall daa4239
Separate write_json from to_json (CQS)
nathanjmcdougall f11141a
Move calls to create_adapter to hide them at a lower level
nathanjmcdougall 13d356e
Add some tests
nathanjmcdougall 82ba58a
Merge branch 'rstudio:main' into feature/move-to-adaptor-backend
nathanjmcdougall 18818f6
Use backported typing_extensions.TypeAlias for Python 3.9
nathanjmcdougall dc683dd
add typing_extensions
isabelizimm File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
from __future__ import annotations | ||
|
||
import json | ||
from abc import abstractmethod | ||
from typing import TYPE_CHECKING, Any, ClassVar, overload | ||
|
||
from databackend import AbstractBackend | ||
from typing_extensions import TypeAlias | ||
|
||
if TYPE_CHECKING: | ||
import pandas as pd | ||
|
||
PandasDataFrame: TypeAlias = pd.DataFrame | ||
DataFrame: TypeAlias = PandasDataFrame | ||
|
||
|
||
class AbstractPandasFrame(AbstractBackend): | ||
_backends = [("pandas", "DataFrame")] | ||
|
||
|
||
AbstractDF: TypeAlias = AbstractPandasFrame | ||
|
||
|
||
class Adaptor: | ||
def __init__(self, data: Any) -> None: | ||
self._d = data | ||
|
||
def write_json(self, file: str) -> None: | ||
with open(file, "w") as f: | ||
f.write(self.to_json()) | ||
|
||
def to_json(self) -> str: | ||
import json | ||
|
||
return json.dumps(self._d) | ||
|
||
def write_joblib(self, file: str) -> None: | ||
import joblib | ||
|
||
joblib.dump(self._d, file) | ||
|
||
def write_csv(self, file: str) -> None: | ||
msg = f"Writing to CSV is not supported for {type(self._d)}" | ||
raise NotImplementedError(msg) | ||
|
||
def write_parquet(self, file: str) -> None: | ||
msg = f"Writing to Parquet is not supported for {type(self._d)}" | ||
raise NotImplementedError(msg) | ||
|
||
def write_feather(self, file: str) -> None: | ||
msg = f"Writing to Feather is not supported for {type(self._d)}" | ||
raise NotImplementedError(msg) | ||
|
||
@property | ||
def data_preview(self) -> str: | ||
# note that the R library uses jsonlite::toJSON | ||
import json | ||
|
||
# TODO(compat): set display none in index.html | ||
return json.dumps({}) | ||
|
||
def default_title(self, name: str) -> str: | ||
# TODO(compat): title says CSV rather than data.frame | ||
# see https://github.com/machow/pins-python/issues/5 | ||
return f"{name}: a pinned {self._obj_name}" | ||
|
||
@property | ||
def _obj_name(self) -> str: | ||
return f"{type(self._d).__qualname__} object" | ||
|
||
|
||
class DFAdaptor(Adaptor): | ||
_d: ClassVar[DataFrame] | ||
|
||
def __init__(self, data: DataFrame) -> None: | ||
super().__init__(data) | ||
|
||
@property | ||
def df_type(self) -> str: | ||
# Consider over-riding this for specialized dataframes | ||
return "DataFrame" | ||
|
||
@property | ||
@abstractmethod | ||
def columns(self) -> list[Any]: ... | ||
|
||
@property | ||
@abstractmethod | ||
def shape(self) -> tuple[int, int]: ... | ||
|
||
@abstractmethod | ||
def head(self, n: int) -> DFAdaptor: ... | ||
|
||
@property | ||
def data_preview(self) -> str: | ||
# TODO(compat) is 100 hard-coded? | ||
# Note that we go df -> json -> dict, to take advantage of type conversions in the dataframe library | ||
data: list[dict[Any, Any]] = json.loads(self.head(100).to_json()) | ||
columns = [ | ||
{"name": [col], "label": [col], "align": ["left"], "type": [""]} | ||
for col in self.columns | ||
] | ||
|
||
# this reproduces R pins behavior, by omitting entries that would be null | ||
data_no_nulls = [{k: v for k, v in row.items() if v is not None} for row in data] | ||
|
||
return json.dumps({"data": data_no_nulls, "columns": columns}) | ||
|
||
@property | ||
def _obj_name(self) -> str: | ||
row, col = self.shape | ||
return f"{row} x {col} {self.df_type}" | ||
|
||
|
||
class PandasAdaptor(DFAdaptor): | ||
_d: ClassVar[PandasDataFrame] | ||
|
||
def __init__(self, data: AbstractPandasFrame) -> None: | ||
super().__init__(data) | ||
|
||
@property | ||
def columns(self) -> list[Any]: | ||
return self._d.columns.tolist() | ||
|
||
@property | ||
def shape(self) -> tuple[int, int]: | ||
return self._d.shape | ||
|
||
def head(self, n: int) -> PandasAdaptor: | ||
return PandasAdaptor(self._d.head(n)) | ||
|
||
def to_json(self) -> str: | ||
return self._d.to_json(orient="records") | ||
|
||
def write_csv(self, file: str) -> None: | ||
self._d.to_csv(file, index=False) | ||
|
||
def write_parquet(self, file: str) -> None: | ||
self._d.to_parquet(file) | ||
|
||
def write_feather(self, file: str) -> None: | ||
self._d.to_feather(file) | ||
|
||
|
||
@overload | ||
def create_adaptor(obj: DataFrame) -> DFAdaptor: ... | ||
@overload | ||
def create_adaptor(obj: Any) -> Adaptor: ... | ||
def create_adaptor(obj: Any | DataFrame) -> Adaptor | DFAdaptor: | ||
if isinstance(obj, AbstractPandasFrame): | ||
return PandasAdaptor(obj) | ||
elif isinstance(obj, Adaptor): | ||
return obj | ||
else: | ||
return Adaptor(obj) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.