Skip to content

Commit

Permalink
Merge pull request #79 from PixelgenTechnologies/feature/exe-1353-ref…
Browse files Browse the repository at this point in the history
…actor-pixeldataset-io

Feature/exe 1353 refactor pixeldataset io
  • Loading branch information
johandahlberg authored Feb 7, 2024
2 parents 10f1045 + 54aa624 commit 21eca65
Show file tree
Hide file tree
Showing 11 changed files with 1,247 additions and 853 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
* Remove multi-sample processing from all `single-cell` subcommands
* Add `--sample_name` option to `single-cell amplicon` to overwrite the name derived from the input filename.
* Add `--skip-input-checks` option to `single-cell amplicon` to make input filename checks warnings instead of errors.
* Pixeldatasets are now written to disk without creating intermediate files on-disk.

### Removed

Expand All @@ -38,6 +39,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

* Bug in README shield formatting


## [0.16.0] - 2024-01-12

This release introduces two major change in pixelator:
Expand Down
765 changes: 409 additions & 356 deletions poetry.lock

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,13 @@ lxml = "*"
cssselect = "*"
typing_extensions = "*"
scipy = "*"
pyarrow = ">=13,<15"
pyarrow = ">=14,<16"
semver = "^3.0.0"
ruamel-yaml = "^0.17.21"
pydantic = "^1.10.7"
polars = "^0.20"
importlib-resources = "^5.12.0"
fsspec = "^2023.9.0"
fsspec = "^2023.12.2"
fastparquet = "^2023.8.0"
graspologic = "^3.3.0"

Expand All @@ -66,7 +66,7 @@ flake8-docstrings = "^1.7.0"
invoke = "*"
isort = "*"
pylint = "*"
pytest = "*"
pytest = "^7.0.0"
sphinx = "*"
tox = "*"
tox-current-env = "^0.0.11"
Expand Down
18 changes: 9 additions & 9 deletions src/pixelator/pixeldataset/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@
FileBasedPixelDatasetBackend,
ObjectBasedPixelDatasetBackend,
)
from pixelator.pixeldataset.file_formats import (
PixelFileFormatSpec,
PixelFileParquetFormatSpec,
PixelFileCSVFormatSpec,
from pixelator.pixeldataset.datastores import (
PixelDataStore,
ZipBasedPixelFileWithParquet,
ZipBasedPixelFileWithCSV,
)
from pixelator.types import PathType

Expand Down Expand Up @@ -296,7 +296,7 @@ def copy(self) -> PixelDataset:
def save(
self,
path: PathType,
file_format: Literal["csv", "parquet"] | PixelFileFormatSpec = "parquet",
file_format: Literal["csv", "parquet"] | PixelDataStore = "parquet",
) -> None:
"""Save the PixelDataset to a .pxl file in the location provided in `path`.
Expand All @@ -310,16 +310,16 @@ def save(
"""
logger.debug("Saving PixelDataset to %s", path)

if isinstance(file_format, PixelFileFormatSpec):
if isinstance(file_format, PixelDataStore):
format_spec = file_format
elif file_format not in ["csv", "parquet"]:
raise AssertionError("`file_format` must be `csv` or `parquet`")
if file_format == "csv":
format_spec = PixelFileCSVFormatSpec()
format_spec = ZipBasedPixelFileWithCSV(path)
if file_format == "parquet":
format_spec = PixelFileParquetFormatSpec()
format_spec = ZipBasedPixelFileWithParquet(path)

format_spec.save(self, path)
format_spec.save(self)

def filter(
self,
Expand Down
30 changes: 13 additions & 17 deletions src/pixelator/pixeldataset/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import polars as pl
from anndata import AnnData

from pixelator.pixeldataset.file_formats import PixelFileFormatSpec
from pixelator.pixeldataset.datastores import PixelDataStore
from pixelator.pixeldataset.utils import (
_enforce_edgelist_types,
)
Expand Down Expand Up @@ -194,7 +194,9 @@ class FileBasedPixelDatasetBackend:
in memory.
"""

def __init__(self, path: PathType) -> None:
def __init__(
self, path: PathType, datastore: Optional[PixelDataStore] = None
) -> None:
"""Create a filebased backend instance.
Create a backend, fetching information from the .pxl file
Expand All @@ -203,42 +205,36 @@ def __init__(self, path: PathType) -> None:
:param path: Path to the .pxl file
"""
self._path = path
self._file_format = PixelFileFormatSpec.guess_file_format(path)
if not datastore:
datastore = PixelDataStore.guess_datastore_from_path(path)
self._datastore = datastore

@cached_property
def adata(self) -> AnnData:
"""Get the AnnData object for the pixel dataset."""
return self._file_format.deserialize_anndata(self._path)
return self._datastore.read_anndata()

@cached_property
def edgelist(self) -> pd.DataFrame:
"""Get the edge list object for the pixel dataset."""
return self._file_format.deserialize_dataframe(
self._path, self._file_format.EDGELIST_KEY
)
return self._datastore.read_edgelist()

@property
def edgelist_lazy(self) -> Optional[pl.LazyFrame]:
"""Get a lazy frame representation of the edgelist."""
return self._file_format.deserialize_dataframe_lazy(
self._path, self._file_format.EDGELIST_KEY
)
return self._datastore.read_edgelist_lazy()

@cached_property
def polarization(self) -> Optional[pd.DataFrame]:
"""Get the polarization object for the pixel dataset."""
return self._file_format.deserialize_dataframe(
self._path, self._file_format.POLARIZATION_KEY
)
return self._datastore.read_polarization()

@cached_property
def colocalization(self) -> Optional[pd.DataFrame]:
"""Get the colocalization object for the pixel dataset."""
return self._file_format.deserialize_dataframe(
self._path, self._file_format.COLOCALIZATION_KEY
)
return self._datastore.read_colocalization()

@cached_property
def metadata(self) -> Optional[Dict]:
"""Get the metadata object for the pixel dataset."""
return self._file_format.deserialize_metadata(self._path)
return self._datastore.read_metadata()
Loading

0 comments on commit 21eca65

Please sign in to comment.