Skip to content

Commit

Permalink
Added option to control strictness of selection from volcano-like plo…
Browse files Browse the repository at this point in the history
…ts in CombObj/DiffCombObj (#31)
  • Loading branch information
msbentsen committed Dec 9, 2021
1 parent b024646 commit 7212068
Show file tree
Hide file tree
Showing 3 changed files with 2,882 additions and 765 deletions.
3,543 changes: 2,798 additions & 745 deletions examples/Differential_analysis.ipynb

Large diffs are not rendered by default.

84 changes: 67 additions & 17 deletions examples/Select_rules.ipynb

Large diffs are not rendered by default.

20 changes: 17 additions & 3 deletions tfcomb/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -1141,7 +1141,9 @@ def select_top_rules(self, n):
def select_significant_rules(self, x="cosine",
y="zscore",
x_threshold=None,
x_threshold_percent=0.05,
y_threshold=None,
y_threshold_percent=0.05,
plot=True,
**kwargs):
"""
Expand All @@ -1155,8 +1157,12 @@ def select_significant_rules(self, x="cosine",
The name of the column within .rules containing the pvalue to be selected on. Default: 'zscore'
x_threshold : float, optional
A minimum threshold for the measure to be selected. If None, the threshold will be estimated from the data. Default: None.
x_threshold_percent : float between 0-1, optional
If x_threshold is not given, x_threshold_percent controls the strictness of the automatic threshold selection. Default: 0.05.
y_threshold : float, optional
A p-value threshold for selecting rules. If None, the threshold will be estimated from the data. Default: None.
y_threshold_percent : float between 0-1, optional
If y_threshold is not given, y_threshold_percent controls the strictness of the automatic threshold selection. Default: 0.05.
plot : bool, optional
Whether to show the 'measure vs. pvalue'-plot or not. Default: True.
kwargs : arguments
Expand All @@ -1178,6 +1184,8 @@ def select_significant_rules(self, x="cosine",
check_value(x_threshold)
if y_threshold is not None:
check_value(y_threshold)
tfcomb.utils.check_value(y_threshold_percent, vmin=0, vmax=1, name="y_threshold_percent")
tfcomb.utils.check_value(x_threshold_percent, vmin=0, vmax=1, name="x_threshold_percent")

#Check if measure are in columns
if x not in self.rules.columns:
Expand All @@ -1186,11 +1194,11 @@ def select_significant_rules(self, x="cosine",
#If measure_threshold is None; try to calculate optimal threshold via knee-plot
if x_threshold is None:
self.logger.info("x_threshold is None; trying to calculate optimal threshold")
x_threshold = tfcomb.utils.get_threshold(self.rules[x])
x_threshold = tfcomb.utils.get_threshold(self.rules[x], percent=x_threshold_percent)

if y_threshold is None:
self.logger.info("y_threshold is None; trying to calculate optimal threshold")
y_threshold = tfcomb.utils.get_threshold(self.rules[y])
y_threshold = tfcomb.utils.get_threshold(self.rules[y], percent=y_threshold_percent)

#Set threshold on table
selected = self.rules.copy()
Expand Down Expand Up @@ -1845,6 +1853,7 @@ def calculate_foldchanges(self, pseudo=None, threads=1):
def select_rules(self, contrast=None,
measure="cosine",
measure_threshold=None,
measure_threshold_percent=0.05,
pvalue_threshold=0.05,
plot = True,
pseudocount = 10**-10,
Expand All @@ -1860,6 +1869,8 @@ def select_rules(self, contrast=None,
The measure to use for selecting rules. Default: "cosine" (internally converted to <prefix1>/<prefix2>_<measure>_log2fc).
measure_threshold : tuple, optional
Threshold for 'measure' for selecting rules. Default: None (the measure is estimated automatically)
measure_threshold_percent : float between 0-1
If measure_threshold is not set, measure_threshold_percent controls the strictness of the automatic threshold. If you increase this value, more differential rules will be found and vice versa. Default: 0.05.
pvalue_threshold : float, optional
The p-value threshold for selecting rules. Default: 0.05.
plot : boolean, optional
Expand All @@ -1877,6 +1888,9 @@ def select_rules(self, contrast=None,
tfcomb.plotting.volcano
"""

tfcomb.utils.check_value(measure_threshold_percent, vmin=0, vmax=1, name="measure_threshold_percent")
tfcomb.utils.check_value(pvalue_threshold, vmin=0, vmax=1, name="measure_threshold_percent")

#Identify measure to use based on contrast
if contrast == None:
contrast = self.contrasts[0]
Expand All @@ -1899,7 +1913,7 @@ def select_rules(self, contrast=None,
#Find optimal measure threshold
if measure_threshold is None:
self.logger.info("measure_threshold is None; trying to calculate optimal threshold")
measure_threshold = tfcomb.utils.get_threshold(self.rules[measure_col], "both")
measure_threshold = tfcomb.utils.get_threshold(self.rules[measure_col], "both", percent=measure_threshold_percent)

if plot == True:
cp = self.rules.copy() #ensures that -log10 col is not added to self.rules
Expand Down

0 comments on commit 7212068

Please sign in to comment.