diff --git a/CHANGELOG.md b/CHANGELOG.md index a7c5a04a..f74cff7f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +* Add `rate_diff_transformation` function with `rate-diff` alias as an alternative option for transforming marker counts before colocalization calculation. * Add `local_g` function to compute spatial autocorrelation of marker counts per node. * Add `compute_transition_probabilities` function to compute transition probabilities for k-step random walks for node pairs in a graph. * Add QC plot showing UMIs per UPIA vs Tau. diff --git a/src/pixelator/analysis/colocalization/__init__.py b/src/pixelator/analysis/colocalization/__init__.py index b25b272f..241427b4 100644 --- a/src/pixelator/analysis/colocalization/__init__.py +++ b/src/pixelator/analysis/colocalization/__init__.py @@ -34,7 +34,11 @@ from pixelator.analysis.permute import permutations from pixelator.graph.utils import Graph from pixelator.pixeldataset import PixelDataset -from pixelator.statistics import correct_pvalues, log1p_transformation +from pixelator.statistics import ( + correct_pvalues, + log1p_transformation, + rate_diff_transformation, +) logger = logging.getLogger(__name__) @@ -91,6 +95,8 @@ def _transform_data( return data if transform == "log1p": return log1p_transformation(data) + if transform == "rate-diff": + return rate_diff_transformation(data) raise ValueError( f"`transform`must be one of: {'/'.join(get_args(TransformationTypes))}" ) diff --git a/src/pixelator/analysis/colocalization/types.py b/src/pixelator/analysis/colocalization/types.py index db64cf7a..6072ed2c 100644 --- a/src/pixelator/analysis/colocalization/types.py +++ b/src/pixelator/analysis/colocalization/types.py @@ -12,7 +12,7 @@ MarkerColocalizationResults = pd.DataFrame -TransformationTypes = Literal["raw", "log1p"] +TransformationTypes = Literal["raw", "log1p", "rate-diff"] @dataclass diff --git a/src/pixelator/statistics.py b/src/pixelator/statistics.py index 2f41fe94..79ef02d3 100644 --- a/src/pixelator/statistics.py +++ b/src/pixelator/statistics.py @@ -131,6 +131,27 @@ def log1p_transformation(df: pd.DataFrame) -> pd.DataFrame: return log1p_df +def rate_diff_transformation(df: pd.DataFrame) -> pd.DataFrame: + """Transform antibody counts as deviation from an expected baseline distribution. + + In this function we refer to baseline distribution as fixed ratio of different + antibody types in each node. For example, if in total 10% of antibodies are + HLA-ABC, in a node with 120 antibodies we expect to see 12 HLA-ABC counts. + If we actually see 8 counts in this node, the rate_diff_transformation for + HLA-ABC in this node will be -4. + + :param df: the dataframe of raw antibody counts (antibodies as columns) + :returns: a dataframe with the counts difference from expected values + :rtype: pd.DataFrame + """ + antibody_counts_per_node = df.sum(axis=1) + antibody_rates = df.sum(axis=0) + antibody_rates = antibody_rates / antibody_rates.sum() + + expected_counts = antibody_counts_per_node.to_frame() @ antibody_rates.to_frame().T + return df - expected_counts + + def rel_normalization(df: pd.DataFrame, axis: Literal[0, 1] = 0) -> pd.DataFrame: """Normalize antibody counts to the relative amount per marker or component. diff --git a/tests/analysis/colocalization/test_colocalization.py b/tests/analysis/colocalization/test_colocalization.py index cc0519cb..e435bf77 100644 --- a/tests/analysis/colocalization/test_colocalization.py +++ b/tests/analysis/colocalization/test_colocalization.py @@ -160,6 +160,46 @@ def test_colocalization_scores_log1p(enable_backend, full_graph_edgelist: pd.Dat assert_frame_equal(result, expected) +@pytest.mark.parametrize("enable_backend", ["networkx"], indirect=True) +def test_colocalization_scores_ratediff( + enable_backend, full_graph_edgelist: pd.DataFrame +): + result = colocalization_scores( + edgelist=full_graph_edgelist, + use_full_bipartite=True, + transformation="rate-diff", + neighbourhood_size=1, + n_permutations=50, + min_region_count=0, + random_seed=1477, + ) + + expected = pd.DataFrame.from_dict( + { + 0: { + "marker_1": "A", + "marker_2": "B", + "pearson": -1.0, + "pearson_mean": -1.0, + "pearson_stdev": 0.0, + "pearson_z": np.nan, + "pearson_p_value": np.nan, + "pearson_p_value_adjusted": np.nan, + "jaccard": 1.0, + "jaccard_mean": 1.0, + "jaccard_stdev": 0.0, + "jaccard_z": np.nan, + "jaccard_p_value": np.nan, + "jaccard_p_value_adjusted": np.nan, + "component": "PXLCMP0000000", + } + }, + orient="index", + ) + + assert_frame_equal(result, expected) + + @pytest.mark.parametrize("enable_backend", ["networkx"], indirect=True) def test_colocalization_scores_should_not_fail_when_one_component_has_single_node( enable_backend, diff --git a/tests/test_statistics.py b/tests/test_statistics.py index ea98c249..79f7bcfd 100644 --- a/tests/test_statistics.py +++ b/tests/test_statistics.py @@ -10,6 +10,7 @@ clr_transformation, correct_pvalues, log1p_transformation, + rate_diff_transformation, rel_normalization, ) @@ -120,6 +121,22 @@ def test_clr_standard_transformation_axis_1(): assert_frame_equal(norm_counts, expected) +def test_rate_diff_transformation(): + antibody_counts = pd.DataFrame( + [[7.0, 3.0, 10.0], [10.0, 2.0, 5.0]], + columns=["A", "B", "C"], + index=["0000000", "0000001"], + ) + + norm_counts = rate_diff_transformation(antibody_counts) + expected = pd.DataFrame( + [[-2.189189, 0.2972973, 1.89189189], [2.189189, -0.2972973, -1.89189189]], + columns=["A", "B", "C"], + index=["0000000", "0000001"], + ) + assert_frame_equal(norm_counts, expected) + + def test_rel_normalization(): antibody_counts = pd.DataFrame( [[7.0, 3.0, 10.0], [10.0, 2.0, 5.0]],