Skip to content

Commit

Permalink
Simplify data passed to create presence matrix
Browse files Browse the repository at this point in the history
  • Loading branch information
ivirshup committed Jan 28, 2025
1 parent 218d49e commit 66224f5
Showing 1 changed file with 4 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def __init__(self, specification: ExperimentSpecification):
self.experiment: soma.Experiment | None = None # initialized in create()
self.experiment_uri: str | None = None # initialized in create()
self.global_var_joinids: pd.DataFrame | None = None
self.presence: dict[int, tuple[npt.NDArray[np.bool_], npt.NDArray[np.int64]]] = {}
self.presence: dict[int, npt.NDArray[np.int64]] = {}

@property
def name(self) -> str:
Expand Down Expand Up @@ -242,9 +242,8 @@ def populate_presence_matrix(self, datasets: list[Dataset]) -> None:

# LIL is fast way to create spmatrix
pm = sparse.lil_matrix((max_dataset_joinid + 1, self.n_var), dtype=bool)
for dataset_joinid, presence in self.presence.items():
data, cols = presence
pm[dataset_joinid, cols] = data # This should always be 1
for dataset_joinid, cols in self.presence.items():
pm[dataset_joinid, cols] = 1

pm = pm.tocoo()
pm.eliminate_zeros()
Expand Down Expand Up @@ -713,11 +712,7 @@ def populate_X_layers(

for presence in eb_summary["presence"]:
assert presence.eb_name == eb.name
eb.presence[presence.dataset_soma_joinid] = (
# If a gene was in the `.var_names` of the h5ad, it counts as present regardless of whether any values were greater than 1
np.broadcast_to(True, presence.cols.shape),
presence.cols,
)
eb.presence[presence.dataset_soma_joinid] = presence.cols


class SummaryStats(TypedDict):
Expand Down

0 comments on commit 66224f5

Please sign in to comment.