Skip to content

Commit

Permalink
Merge pull request #114 from PixelgenTechnologies/feature/EXE-1431-Ad…
Browse files Browse the repository at this point in the history
…d-differential-colocalization-measures

Added the scores from Mann–Whitney test to differential colocalizatio…
  • Loading branch information
ptajvar authored Mar 19, 2024
2 parents 7bb2dc8 + ac818fb commit 99bfc45
Show file tree
Hide file tree
Showing 5 changed files with 201 additions and 17 deletions.
45 changes: 33 additions & 12 deletions src/pixelator/analysis/colocalization/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

import numpy as np
import pandas as pd
from scipy.stats import mannwhitneyu
from statsmodels.stats.multitest import multipletests

from pixelator.analysis.colocalization.estimate import (
estimate_observation_statistics,
Expand Down Expand Up @@ -289,19 +291,37 @@ def data():


def _colocalization_wilcoxon(
markers, df, reference, target, contrast_column, value_column
markers,
df,
reference,
target,
contrast_column,
value_column,
):
reference_df = df[df[contrast_column] == reference]
target_df = df[df[contrast_column] == target]
reference_df = df.loc[df[contrast_column] == reference, :]
target_df = df.loc[df[contrast_column] == target, :]

if reference_df.empty or target_df.empty:
return {}

estimate = np.median(
target_df[value_column].to_numpy()[:, None]
- reference_df[value_column].to_numpy()
)

stat, p_value = mannwhitneyu(
x=reference_df[value_column],
y=target_df[value_column],
alternative="two-sided",
)

return {
"marker_1": markers[0],
"marker_2": markers[1],
"markers": "/".join(markers),
"estimate": estimate,
"stat": stat,
"p_value": p_value,
"median_difference": estimate,
}


Expand Down Expand Up @@ -331,9 +351,9 @@ def get_differential_colocalization(
same_marker_mask = (
colocalization_data_frame["marker_1"] == colocalization_data_frame["marker_2"]
)
data_frame = colocalization_data_frame[~same_marker_mask]
data_frame = colocalization_data_frame.loc[~same_marker_mask, :]

colocalization_test_results = pd.DataFrame.from_records(
differential_colocalization = pd.DataFrame.from_records(
data_frame.groupby(["marker_1", "marker_2"]).apply(
lambda marker_data: _colocalization_wilcoxon(
marker_data.name,
Expand All @@ -345,18 +365,19 @@ def get_differential_colocalization(
)
)
)
differential_colocalization = colocalization_test_results.rename(
columns={"estimate": value_column}
)

# If a marker appears only in one of the datasets, it's differential value will be NAN
nan_values = differential_colocalization[
differential_colocalization[value_column].isna()
differential_colocalization["median_difference"].isna()
].index
differential_colocalization.drop(
nan_values,
axis="index",
inplace=True,
)

return differential_colocalization[["marker_1", "marker_2", value_column]]
_, pvals_corrected, *_ = multipletests(
differential_colocalization["p_value"], method="bonferroni"
)
differential_colocalization["p_adj"] = pvals_corrected

return differential_colocalization
134 changes: 130 additions & 4 deletions src/pixelator/plot/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,7 @@ def _get_top_marker_pairs(
value_col: str = "pearson",
) -> list:
colocalization_data["abs_val"] = colocalization_data[value_col].abs()
top_marker_pairs = colocalization_data.nlargest(10, "abs_val")
top_marker_pairs = colocalization_data.nlargest(n_top_marker_pairs, "abs_val")
top_markers = list(
set(top_marker_pairs["marker_1"]).union(set(top_marker_pairs["marker_2"]))
)
Expand Down Expand Up @@ -760,19 +760,19 @@ def plot_colocalization_diff_heatmap(

if n_top_marker_pairs is not None:
top_markers = _get_top_marker_pairs(
differential_colocalization, n_top_marker_pairs, value_col
differential_colocalization, n_top_marker_pairs, "median_difference"
)
else:
top_markers = None

# Making the differential colocalization symmetric
differential_colocalization = _make_colocalization_symmetric(
differential_colocalization, value_col
differential_colocalization, "median_difference"
)

pivoted_differential_colocalization = _pivot_colocalization_data(
differential_colocalization,
value_col,
"median_difference",
markers=top_markers,
)

Expand All @@ -791,6 +791,132 @@ def plot_colocalization_diff_heatmap(
return plt.gcf(), plt.gca()


def _add_top_marker_labels(
differential_colocalization,
ax,
n_top_pairs: int = 5,
min_log_p: float = 5.0,
):
differential_colocalization = differential_colocalization.sort_values(
"median_difference"
)
differential_colocalization = differential_colocalization.loc[
-np.log10(differential_colocalization["p_adj"]) > min_log_p, :
]

## Labels for marker pair withs highest negative differential colocalization scores
for _, row in differential_colocalization.head(n_top_pairs).iterrows():
x, y = row[["median_difference", "p_adj"]]
y = -np.log10(y)
if x > 0:
continue
ax.text(x, y, row["markers"], horizontalalignment="left", fontsize="xx-small")

## Labels for marker pair with highest positive differential colocalization scores
for _, row in differential_colocalization.tail(n_top_pairs).iterrows():
x, y = row[["median_difference", "p_adj"]]
y = -np.log10(y)
if x < 0:
continue
ax.text(x, y, row["markers"], horizontalalignment="left", fontsize="xx-small")


def _add_target_mean_colocalizations(
differential_colocalization,
target_coloc,
value_col,
):
differential_colocalization = differential_colocalization.fillna(0).reset_index()

target_values = (
target_coloc.groupby(["marker_1", "marker_2"])[value_col].mean().reset_index()
)
differential_colocalization = pd.merge(
target_values,
differential_colocalization,
on=["marker_1", "marker_2"],
how="inner",
)

return differential_colocalization


def plot_colocalization_diff_volcano(
colocalization_data: pd.DataFrame,
target: str,
reference: str,
contrast_column: str = "sample",
cmap: str = "vlag",
use_z_score: bool = True,
n_top_pairs: int = 5,
min_log_p: float = 5.0,
ax: Optional[plt.Axes] = None,
) -> Tuple[plt.Figure, plt.Axes]:
"""Generate the volcano plot of differential colocalization between reference and target components.
Example usage: `plot_colocalization_diff_volcano(pxl.colocalization, target:"stimulated", reference:"control", contrast_column="sample")`.
:param colocalization_data: The colocalization data frame that can be found in a pixel variable
"pxl" through pxl.colocalization. The data frame should contain the
columns "marker_1", "marker_2", "pearson", "pearson_z", and the contrast_column.
:param target: The label for target components in the contrast_column.
:param reference: The label for reference components in the contrast_column.
:param contrast_column: The column to use for the contrast. Defaults to "sample".
:param cmap: The colormap to use for the heatmap. Defaults to "vlag".
:param use_z_score: Whether to use the z-score. Defaults to True.
:param n_top_pairs: Number of high value marker-pairs to label from positive and negative sides.
:param min_log_p: marker-pairs only receive a label if -log10 of their p-value is higher than
this parameter.
:return: The figure and axes objects of the plot.
:rtype: Tuple[plt.Figure, plt.Axes]
"""
if use_z_score:
value_col = "pearson_z"
else:
value_col = "pearson"

differential_colocalization = get_differential_colocalization(
colocalization_data,
target=target,
reference=reference,
contrast_column=contrast_column,
use_z_score=use_z_score,
)

differential_colocalization = _add_target_mean_colocalizations(
differential_colocalization,
target_coloc=colocalization_data.loc[
colocalization_data[contrast_column] == target, :
],
value_col=value_col,
)

if ax is None:
fig, ax = plt.subplots()

p = ax.scatter(
x=differential_colocalization["median_difference"],
y=-np.log10(differential_colocalization["p_adj"]),
c=differential_colocalization[value_col],
s=20,
marker="o",
cmap=cmap,
)

ax.set(xlabel="Median difference", ylabel=r"$-\log_{10}$(adj. p-value)")
fig = plt.gcf()
fig.colorbar(p, label="Mean target colocalization score", cmap=cmap)
_add_top_marker_labels(
differential_colocalization,
n_top_pairs=n_top_pairs,
min_log_p=min_log_p,
ax=ax,
)

return fig, ax


@experimental
def plot_3d_heatmap(
component_graph: Graph,
Expand Down
12 changes: 11 additions & 1 deletion tests/analysis/colocalization/test_colocalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,17 @@ def test_get_differential_colocalization(setup_basic_pixel_dataset):
use_z_score=False,
)
expected = pd.DataFrame.from_dict(
{0: {"marker_1": "CD19", "marker_2": "CD45", "pearson": 0.1}},
{
0: {
"marker_1": "CD19",
"marker_2": "CD45",
"markers": "CD19/CD45",
"stat": 0.0,
"p_value": 1.0,
"median_difference": 0.09999999999999998,
"p_adj": 1.0,
}
},
orient="index",
)
assert_frame_equal(result, expected, check_exact=False, atol=0.01)
Expand Down
27 changes: 27 additions & 0 deletions tests/plot/test_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
plot_3d_graph,
plot_3d_heatmap,
plot_colocalization_diff_heatmap,
plot_colocalization_diff_volcano,
plot_colocalization_heatmap,
scatter_umi_per_upia_vs_tau,
)
Expand Down Expand Up @@ -110,6 +111,32 @@ def test_plot_colocalization_diff_heatmap(setup_basic_pixel_dataset):
return fig


@pytest.mark.mpl_image_compare(
deterministic=True,
baseline_dir="../snapshots/test_plot/test_plot_colocalization_diff_volcano",
)
def test_plot_colocalization_diff_volcano(setup_basic_pixel_dataset):
np.random.seed(0)
pxl_data, *_ = setup_basic_pixel_dataset
colocalization_data = pxl_data.colocalization
colocalization_data.loc[5] = [
"CD3",
"CD19",
0.5,
"PXLCMP0000002",
] # Adding a new pair of colocalization data as the volcano needs at least 2 rows
colocalization_data.loc[6] = ["CD3", "CD19", 0.7, "PXLCMP0000003"]
fig, _ = plot_colocalization_diff_volcano(
colocalization_data,
target="PXLCMP0000003",
reference="PXLCMP0000002",
contrast_column="component",
use_z_score=False,
min_log_p=-1,
)
return fig


@pytest.mark.mpl_image_compare(
deterministic=False,
baseline_dir="../snapshots/test_plot/test_scatter_umi_per_upia_vs_tau",
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 99bfc45

Please sign in to comment.