Skip to content

Commit

Permalink
Merge branch 'ivirshup/n_measured_obs_validation' into ivirshup/n_mea…
Browse files Browse the repository at this point in the history
…sured_obs
  • Loading branch information
ivirshup committed Jan 28, 2025
2 parents 0c71b92 + 40e83ff commit b10afcf
Showing 1 changed file with 17 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,14 @@ def validate_X_layers_presence(
3. Presence mask per dataset is correct for each dataset
"""

def _read_var_names(path: str) -> npt.NDArray[object]:
import h5py
from anndata.io import read_elem

with h5py.File(path) as f:
index_key = f["var"].attrs["_index"]
return read_elem(f["var"][index_key])

@logit(logger)
def _validate_X_layers_presence_general(experiment_specifications: list[ExperimentSpecification]) -> bool:
for es in experiment_specifications:
Expand Down Expand Up @@ -585,6 +593,9 @@ def _validate_X_layers_presence(
)
if len(obs_df) > 0: # skip empty experiments
X_raw = exp.ms[MEASUREMENT_RNA_NAME].X["raw"]
feature_ids = pd.Index(
exp.ms[MEASUREMENT_RNA_NAME].var.read(column_names=["feature_id"]).concat().to_pandas()
)

presence_accumulator = np.zeros((X_raw.shape[1]), dtype=np.bool_)
for block, _ in (
Expand All @@ -601,6 +612,12 @@ def _validate_X_layers_presence(
.concat()
)

# Get soma_joinids for feature in the original h5ad
orig_feature_ids = _read_var_names(dataset.dataset_h5ad_path)
orig_indices = np.sort(feature_ids.get_indexer(feature_ids.intersection(orig_feature_ids)))

np.testing.assert_array_equal(presence["soma_dim_1"], orig_indices)

assert np.array_equal(presence_accumulator, presence), "Presence value does not match X[raw]"

assert (
Expand Down

0 comments on commit b10afcf

Please sign in to comment.