diff --git a/analysis_replication/README.md b/analysis_replication/README.md
index 487296a1c..d7123b46e 100644
--- a/analysis_replication/README.md
+++ b/analysis_replication/README.md
@@ -1,19 +1,24 @@
 
 # Reproducible analyses
 
-The scripts in this directory reproduce the analyses of the curated datasets. They were written as a record of usage of the dashboard web application, which provides the same results.
+The scripts in this directory reproduce the analyses of the curated datasets, in the order they are mentioned in the article.
 
-You can run them on the public demo API,
+These scripts were written as a record of usage of the dashboard web application, which provides the same results.
+
+You can run them on the public demo API:
 
 ```sh
-python reproducible_analyses/melanoma_il2.py oncopathtk.org/api
+python run_all.py http://oncopathtk.org/api
 ```
 
-or from your own local instance of the application,
+Or you can run them from your own local instance of the application:
 
 ```sh
-python reproducible_analyses/melanoma_il2.py "127.0.0.1:8080"
+python run_all.py "http://127.0.0.1:8080"
 ```
 
 substituting the argument with the address of your local API server. (See *Setting up a local application instance*).
 
+- These scripts just call the web API, and so they do not require Python package `spatialprofilingtoolbox`.
+- You can alternatively store the API host in `api_host.txt` and omit the command-line argument above.
+- The run result is here in [results.txt](results.txt).
diff --git a/analysis_replication/accessors.py b/analysis_replication/accessors.py
index db1141d78..ad0250894 100644
--- a/analysis_replication/accessors.py
+++ b/analysis_replication/accessors.py
@@ -1,21 +1,59 @@
 """Convenience caller of HTTP methods for data access."""
-import sys
-
+from typing import cast
 import re
 from itertools import chain
 from urllib.request import urlopen
 from urllib.parse import urlencode
 from urllib.error import HTTPError
 import json
+from os.path import exists
+from time import sleep
 
 from pandas import DataFrame
 from pandas import concat
 from numpy import inf
 from numpy import nan
+from numpy import isnan
+from numpy import mean
+from numpy import log
+from scipy.stats import ttest_ind  # type: ignore
+
+
+def get_default_host(given: str | None) -> str | None:
+    if given is not None:
+        return given
+    filename = 'api_host.txt'
+    if exists(filename):
+        with open(filename, 'rt', encoding='utf-8') as file:
+            host = file.read().rstrip()
+    else:
+        host = None
+    return host
+
+
+class Colors:
+    bold_green = '\u001b[32;1m'
+    blue = '\u001b[34m'
+    bold_magenta = '\u001b[35;1m'
+    bold_red = '\u001b[31;1m'
+    yellow = '\u001b[33m'
+    reset = '\u001b[0m'
+
+
+def sleep_poll():
+    seconds = 10
+    print(f'Waiting {seconds} seconds to poll.')
+    sleep(seconds)
+
 
 class DataAccessor:
     """Convenience caller of HTTP methods for data access."""
-    def __init__(self, study, host='data.oncopathtk.org'):
+
+    def __init__(self, study, host=None):
+        _host = get_default_host(host)
+        if _host is None:
+            raise RuntimeError('Expected host name in api_host.txt .')
+        host = _host
         use_http = False
         if re.search('^http://', host):
             use_http = True
@@ -23,6 +61,7 @@ def __init__(self, study, host='data.oncopathtk.org'):
         self.host = host
         self.study = study
         self.use_http = use_http
+        print('\n' + Colors.bold_magenta + study + Colors.reset + '\n')
         self.cohorts = self._retrieve_cohorts()
         self.all_cells = self._retrieve_all_cells_counts()
 
@@ -30,15 +69,21 @@ def counts(self, phenotype_names):
         if isinstance(phenotype_names, str):
             phenotype_names = [phenotype_names]
         conjunction_criteria = self._conjunction_phenotype_criteria(phenotype_names)
-        all_name = ' and '.join([self._name_phenotype(p) for p in phenotype_names])
+        all_name = self.name_for_all_phenotypes(phenotype_names)
         conjunction_counts_series = self._get_counts_series(conjunction_criteria, all_name)
         individual_counts_series = [
             self._get_counts_series(self._phenotype_criteria(name), self._name_phenotype(name))
             for name in phenotype_names
         ]
-        df = concat([self.cohorts, self.all_cells, conjunction_counts_series, *individual_counts_series], axis=1)
+        df = concat(
+            [self.cohorts, self.all_cells, conjunction_counts_series, *individual_counts_series],
+            axis=1,
+        )
         df.replace([inf, -inf], nan, inplace=True)
         return df
+    
+    def name_for_all_phenotypes(self, phenotype_names):
+        return ' and '.join([self._name_phenotype(p) for p in phenotype_names])
 
     def neighborhood_enrichment(self, phenotype_names):
         feature_class = 'neighborhood enrichment'
@@ -70,12 +115,13 @@ def _one_phenotype_spatial_metric(self, phenotype_names, feature_class):
         parts = parts1 + [('study', self.study), ('feature_class', feature_class)]
         query = urlencode(parts)
         endpoint = 'request-spatial-metrics-computation-custom-phenotype'
-        response, url = self._retrieve(endpoint, query)
-        if response['is_pending'] is True:
-            print('Computation is pending. Try again soon!')
-            print('URL:')
-            print(url)
-            sys.exit()
+
+        while True:
+            response, url = self._retrieve(endpoint, query)
+            if response['is_pending'] is True:
+                sleep_poll()
+            else:
+                break
 
         rows = [
             {'sample': key, '%s, %s' % (feature_class, ' and '.join(names)): value}
@@ -109,12 +155,13 @@ def _two_phenotype_spatial_metric(self, phenotype_names, feature_class):
             parts.append(('radius', '100'))
         query = urlencode(parts)
         endpoint = 'request-spatial-metrics-computation-custom-phenotypes'
-        response, url = self._retrieve(endpoint, query)
-        if response['is_pending'] is True:
-            print('Computation is pending. Try again soon!')
-            print('URL:')
-            print(url)
-            sys.exit()
+
+        while True:
+            response, url = self._retrieve(endpoint, query)
+            if response['is_pending'] is True:
+                sleep_poll()
+            else:
+                break
 
         rows = [
             {'sample': key, '%s, %s' % (feature_class, ' and '.join(names)): value}
@@ -210,3 +257,103 @@ def _name_phenotype(self, phenotype):
                 for keyword, sign in zip(['positive', 'negative'], ['+', '-'])
             ]).rstrip()
         return str(phenotype)
+
+    def important(self, phenotype_names: str | list[str]) -> dict[str, float]:
+        if isinstance(phenotype_names, str):
+            phenotype_names = [phenotype_names]
+        conjunction_criteria = self._conjunction_phenotype_criteria(phenotype_names)
+        parts = list(chain(*[
+            [(f'{keyword}_marker', channel) for channel in argument]
+            for keyword, argument in zip(
+                ['positive', 'negative'], [
+                    conjunction_criteria['positive_markers'],
+                    conjunction_criteria['negative_markers'],
+                ])
+        ]))
+        parts = sorted(list(set(parts)))
+        parts.append(('study', self.study))
+        query = urlencode(parts)
+        phenotype_counts, _ = self._retrieve('cggnn-importance-composition', query)
+        return {c['specimen']: c['percentage'] for c in phenotype_counts['counts']}
+
+
+class ExpectedQuantitativeValueError(ValueError):
+    """
+    Raised when an expected quantitative result is significantly different from the expected value.
+    """
+
+    def __init__(self, expected: float, actual: float):
+        error_percent = self.error_percent(expected, actual)
+        if error_percent is not None:
+            error_percent = round(100 * error_percent) / 100
+        message = f'''
+        Expected {expected} but got {Colors.bold_red}{actual}{Colors.reset}. Error is {error_percent}%.
+        '''
+        super().__init__(message)
+
+    @staticmethod
+    def is_error(expected: float, actual: float) -> bool:
+        error_percent = ExpectedQuantitativeValueError.error_percent(expected, actual)
+        if error_percent is None:
+            return True
+        if error_percent < 1.0:
+            return False
+        return True
+
+    @staticmethod
+    def error_percent(expected: float, actual: float) -> float | None:
+        if actual != 0:
+            error_percent = abs(100 * (1 - (actual / expected)))
+        else:
+            error_percent = None
+        return error_percent
+
+
+def handle_expected_actual(expected: float, actual: float | None):
+    _actual = cast(float, actual)
+    if ExpectedQuantitativeValueError.is_error(expected, _actual):
+        raise ExpectedQuantitativeValueError(expected, _actual)
+    string = str(_actual)
+    padded = string + ' '*(21 - len(string))
+    print(Colors.bold_green + padded + Colors.reset, end='')
+
+
+def univariate_pair_compare(
+        list1,
+        list2,
+        expected_fold=None,
+        do_log_fold: bool = False,
+        show_pvalue=False,
+):
+    list1 = list(filter(lambda element: not isnan(element), list1.values))
+    list2 = list(filter(lambda element: not isnan(element), list2.values))
+
+    mean1 = float(mean(list1))
+    mean2 = float(mean(list2))
+    actual = mean2 / mean1
+    if expected_fold is not None:
+        handle_expected_actual(expected_fold, actual)
+    print((mean2, mean1, actual), end='')
+
+    if do_log_fold:
+        _list1 = [log(e) for e in list(filter(lambda element: element != 0, list1))]
+        _list2 = [log(e) for e in list(filter(lambda element: element != 0, list2))]
+        _mean1 = float(mean(_list1))
+        _mean2 = float(mean(_list2))
+        log_fold = _mean2 / _mean1
+        print('  log fold: ' + Colors.yellow + str(log_fold) + Colors.reset, end='')
+
+    if show_pvalue:
+        if do_log_fold:
+            result = ttest_ind(_list1, _list2)
+            print(
+                '  p-value (after log): ' + Colors.blue + str(result.pvalue) + Colors.reset, end=''
+            )
+        else:
+            result = ttest_ind(list1, list2)
+            print('  p-value: ' + Colors.blue + str(result.pvalue) + Colors.reset, end='')
+
+    print('')
+
+def print_comparison() -> None:
+    pass
diff --git a/analysis_replication/breast_imc.py b/analysis_replication/breast_imc.py
index fc0d563ac..32aefe1bb 100644
--- a/analysis_replication/breast_imc.py
+++ b/analysis_replication/breast_imc.py
@@ -1,57 +1,64 @@
 """Data analysis script for one dataset."""
-
 import sys
 
 from numpy import mean
 from numpy import inf
 
 from accessors import DataAccessor
+from accessors import get_default_host
+from accessors import univariate_pair_compare as compare
+
+def test(host):
+    study = 'Breast cancer IMC'
+    access = DataAccessor(study, host=host)
+
+    KRT = {
+    '5': {'positive_markers': ['KRT5', 'CK'], 'negative_markers': []},
+    '7': {'positive_markers': ['KRT7', 'CK'], 'negative_markers': []},
+    '14': {'positive_markers': ['KRT14', 'CK'], 'negative_markers': []},
+    '19': {'positive_markers': ['KRT19', 'CK'], 'negative_markers': []},
+    }
+
+    df = access.proximity([KRT['14'], KRT['7']])
+    values1 = df[df['cohort'] == '1']['proximity, KRT14+ CK+ and KRT7+ CK+']
+    values2 = df[df['cohort'] == '2']['proximity, KRT14+ CK+ and KRT7+ CK+']
+    # handle_expected_actual(1.6216, mean2 / mean1)
+    # # handle_expected_actual(1.69, mean2 / mean1)
+    compare(values1, values2, expected_fold=1.6216, show_pvalue=True)
+
+    df = access.proximity([KRT['14'], KRT['5']])
+    values2 = df[df['cohort'] == '2']['proximity, KRT14+ CK+ and KRT5+ CK+']
+    values3 = df[df['cohort'] == '3']['proximity, KRT14+ CK+ and KRT5+ CK+']
+    # # handle_expected_actual(0.65, mean2 / mean3)
+    # handle_expected_actual(1.265, mean2 / mean3)
+    compare(values3, values2, expected_fold=1.265)
+
+    df = access.counts([KRT['14'], KRT['7']])
+    fractions = df['KRT14+ CK+ and KRT7+ CK+'] / df['all cells']
+    mean0 = float(mean(fractions))
+    print(mean0)
+
+    df = access.counts([KRT['14'], KRT['5']])
+    fractions = df['KRT14+ CK+ and KRT5+ CK+'] / df['all cells']
+    mean0 = float(mean(fractions))
+    print(mean0)
+
+    df = access.counts([KRT['14'], KRT['19']])
+    fractions = df['KRT14+ CK+'] / df['KRT19+ CK+']
+    fractions = fractions[fractions != inf]
+    fractions1 = fractions[df['cohort'] == '1']
+    fractions2 = fractions[df['cohort'] == '2']
+    fractions3 = fractions[df['cohort'] == '3']
+    compare(fractions3, fractions2, expected_fold=111.32, show_pvalue=True)
+    compare(fractions1, fractions2, expected_fold=11.39, show_pvalue=True)
+
 
-study = 'Breast cancer IMC'
-if len(sys.argv) == 1:
-    access = DataAccessor(study)
-else:
-    access = DataAccessor(study, host=sys.argv[1])
-
-
-KRT = {
-   '5': {'positive_markers': ['KRT5', 'CK'], 'negative_markers': []},
-   '7': {'positive_markers': ['KRT7', 'CK'], 'negative_markers': []},
-   '14': {'positive_markers': ['KRT14', 'CK'], 'negative_markers': []},
-   '19': {'positive_markers': ['KRT19', 'CK'], 'negative_markers': []},
-}
-
-df = access.proximity([KRT['14'], KRT['7']])
-values1 = df[df['cohort'] == '1']['proximity, KRT14+ CK+ and KRT7+ CK+']
-values2 = df[df['cohort'] == '2']['proximity, KRT14+ CK+ and KRT7+ CK+']
-mean1 = mean(values1)
-mean2 = mean(values2)
-print((mean2, mean1, mean2 / mean1))
-
-df = access.proximity([KRT['14'], KRT['5']])
-values2 = df[df['cohort'] == '2']['proximity, KRT14+ CK+ and KRT5+ CK+']
-values3 = df[df['cohort'] == '3']['proximity, KRT14+ CK+ and KRT5+ CK+']
-mean2 = mean(values2)
-mean3 = mean(values3)
-print((mean2, mean3, mean2 / mean3))
-
-df = access.counts([KRT['14'], KRT['7']])
-fractions = df['KRT14+ CK+ and KRT7+ CK+'] / df['all cells']
-print(mean(fractions))
-
-
-df = access.counts([KRT['14'], KRT['5']])
-fractions = df['KRT14+ CK+ and KRT5+ CK+'] / df['all cells']
-print(mean(fractions))
-
-df = access.counts([KRT['14'], KRT['19']])
-fractions = df['KRT14+ CK+'] / df['KRT19+ CK+']
-fractions = fractions[fractions != inf]
-fractions1 = fractions[df['cohort'] == '1']
-fractions2 = fractions[df['cohort'] == '2']
-fractions3 = fractions[df['cohort'] == '3']
-mean1 = mean(fractions1)
-mean2 = mean(fractions2)
-mean3 = mean(fractions3)
-print((mean2, mean3, mean2 / mean3))
-print((mean2, mean1, mean2 / mean1))
+if __name__=='__main__':
+    host: str | None
+    if len(sys.argv) == 2:
+        host = sys.argv[1]
+    else:
+        host = get_default_host(None)
+    if host is None:
+        raise RuntimeError('Could not determine API server.')
+    test(get_default_host(None))
diff --git a/analysis_replication/luad_imc.py b/analysis_replication/luad_imc.py
index d14cf35f3..b4afb558b 100644
--- a/analysis_replication/luad_imc.py
+++ b/analysis_replication/luad_imc.py
@@ -1,57 +1,54 @@
 """Data analysis script for one dataset."""
-
 import sys
 
-from numpy import mean
 from numpy import inf
 
 from accessors import DataAccessor
-
-study = 'LUAD progression'
-if len(sys.argv) == 1:
-    access = DataAccessor(study)
-else:
-    access = DataAccessor(study, host=sys.argv[1])
-
-
-df = access.spatial_autocorrelation('B cell')
-values1 = df[df['cohort'] == '1']['spatial autocorrelation, B cell']
-values2 = df[df['cohort'] == '2']['spatial autocorrelation, B cell']
-mean1 = mean(values1)
-mean2 = mean(values2)
-print((mean2, mean1, mean2 / mean1))
-
-df = access.neighborhood_enrichment(['CD163+ macrophage', 'Regulatory T cell'])
-values1 = df[df['cohort'] == '1']['neighborhood enrichment, CD163+ macrophage and Regulatory T cell']
-values2 = df[df['cohort'] == '2']['neighborhood enrichment, CD163+ macrophage and Regulatory T cell']
-mean1 = mean(values1)
-mean2 = mean(values2)
-print((mean2, mean1, mean2 / mean1))
-
-df = access.neighborhood_enrichment(['CD163+ macrophage', 'Endothelial cell'])
-values1 = df[df['cohort'] == '1']['neighborhood enrichment, CD163+ macrophage and Endothelial cell']
-values2 = df[df['cohort'] == '2']['neighborhood enrichment, CD163+ macrophage and Endothelial cell']
-mean1 = mean(values1)
-mean2 = mean(values2)
-print((mean2, mean1, mean2 / mean1))
-
-klrd1 = {'positive_markers': ['KLRD1'], 'negative_markers': []}
-cd14 = {'positive_markers': ['CD14'], 'negative_markers': []}
-cd14_fcgr3a = {'positive_markers': ['CD14', 'FCGR3A'], 'negative_markers': []}
-
-df = access.counts([klrd1, cd14, cd14_fcgr3a])
-fractions = df['KLRD1+'] / df['CD14+ FCGR3A+']
-fractions = fractions[fractions != inf]
-fractions1 = fractions[df['cohort'] == '1']
-fractions2 = fractions[df['cohort'] == '2']
-mean1 = mean(fractions1)
-mean2 = mean(fractions2)
-print((mean2, mean1, mean2 / mean1))
-
-fractions = df['KLRD1+'] / df['CD14+']
-fractions = fractions[fractions != inf]
-fractions1 = fractions[df['cohort'] == '1']
-fractions2 = fractions[df['cohort'] == '2']
-mean1 = mean(fractions1)
-mean2 = mean(fractions2)
-print((mean2, mean1, mean2 / mean1))
+from accessors import get_default_host
+from accessors import univariate_pair_compare as compare
+
+def test(host):
+    study = 'LUAD progression'
+    access = DataAccessor(study, host=host)
+
+    df = access.spatial_autocorrelation('B cell')
+    values1 = df[df['cohort'] == '1']['spatial autocorrelation, B cell']
+    values2 = df[df['cohort'] == '2']['spatial autocorrelation, B cell']
+    compare(values1, values2, expected_fold=1.571, show_pvalue=True)
+
+    df = access.neighborhood_enrichment(['CD163+ macrophage', 'Regulatory T cell'])
+    values1 = df[df['cohort'] == '1']['neighborhood enrichment, CD163+ macrophage and Regulatory T cell']
+    values2 = df[df['cohort'] == '2']['neighborhood enrichment, CD163+ macrophage and Regulatory T cell']
+    compare(values1, values2, expected_fold=797.46, do_log_fold=True, show_pvalue=True)
+
+    df = access.neighborhood_enrichment(['CD163+ macrophage', 'Endothelial cell'])
+    values1 = df[df['cohort'] == '1']['neighborhood enrichment, CD163+ macrophage and Endothelial cell']
+    values2 = df[df['cohort'] == '2']['neighborhood enrichment, CD163+ macrophage and Endothelial cell']
+    compare(values1, values2, expected_fold=9.858, do_log_fold=True, show_pvalue=True)
+
+    klrd1 = {'positive_markers': ['KLRD1'], 'negative_markers': []}
+    cd14 = {'positive_markers': ['CD14'], 'negative_markers': []}
+    cd14_fcgr3a = {'positive_markers': ['CD14', 'FCGR3A'], 'negative_markers': []}
+
+    df = access.counts([klrd1, cd14, cd14_fcgr3a])
+    fractions = df['KLRD1+'] / df['CD14+ FCGR3A+']
+    fractions = fractions[fractions != inf]
+    fractions1 = fractions[df['cohort'] == '1']
+    fractions2 = fractions[df['cohort'] == '2']
+    compare(fractions1, fractions2, expected_fold=4.56, show_pvalue=True)
+
+    fractions = df['KLRD1+'] / df['CD14+']
+    fractions = fractions[fractions != inf]
+    fractions1 = fractions[df['cohort'] == '1']
+    fractions2 = fractions[df['cohort'] == '2']
+    compare(fractions1, fractions2, expected_fold=3.78, show_pvalue=True)
+
+if __name__=='__main__':
+    host: str | None
+    if len(sys.argv) == 2:
+        host = sys.argv[1]
+    else:
+        host = get_default_host(None)
+    if host is None:
+        raise RuntimeError('Could not determine API server.')
+    test(host)
diff --git a/analysis_replication/melanoma_ici.py b/analysis_replication/melanoma_ici.py
index 7a181de3b..7507cadb9 100644
--- a/analysis_replication/melanoma_ici.py
+++ b/analysis_replication/melanoma_ici.py
@@ -1,51 +1,43 @@
 """Data analysis script for one dataset."""
-
 import sys
 
 from numpy import mean
 
 from accessors import DataAccessor
-
-study = 'Melanoma CyTOF ICI'
-if len(sys.argv) == 1:
-    access = DataAccessor(study)
-else:
-    access = DataAccessor(study, host=sys.argv[1])
-
-antigen_experienced_cytotoxic = {'positive_markers': ['CD8A', 'CD3', 'CD45RO'], 'negative_markers': []}
-
-# The average value of the neighborhood enrichment score for phenotype(s) CD3+ CD45RO+ CD8A+ and
-# Melanoma is 1.39 times higher in cohort 1 than in cohort 2.
-df = access.neighborhood_enrichment([antigen_experienced_cytotoxic, 'Melanoma'])
-print(df)
-values1 = df[df['cohort'] == '1']['neighborhood enrichment, CD8A+ CD3+ CD45RO+ and Melanoma']
-values2 = df[df['cohort'] == '2']['neighborhood enrichment, CD8A+ CD3+ CD45RO+ and Melanoma']
-mean1 = mean(values1)
-mean2 = mean(values2)
-print((mean1, mean2, mean1 / mean2))
-
-print('')
-
-# The average value of the co-occurrence score for phenotype(s) CD3+ CD45RO+ CD8A+ and Melanoma
-# is 1.13 times higher in cohort 1 than in cohort 2.
-df = access.co_occurrence([antigen_experienced_cytotoxic, 'Melanoma'])
-print(df)
-values1 = df[df['cohort'] == '1']['co-occurrence, CD8A+ CD3+ CD45RO+ and Melanoma']
-values2 = df[df['cohort'] == '2']['co-occurrence, CD8A+ CD3+ CD45RO+ and Melanoma']
-mean1 = mean(values1)
-mean2 = mean(values2)
-print((mean1, mean2, mean1 / mean2))
-
-print('')
-
-# On average, the fraction of cells that are CD8A+ CD3+ CD45RO+ and MKI67+ is 1.41 times higher
-# in cohort 1 than in cohort 2.
-proliferative = {'positive_markers': ['MKI67'], 'negative_markers': []}
-df = access.counts([antigen_experienced_cytotoxic, proliferative])
-print(df)
-fractions = df['CD8A+ CD3+ CD45RO+ and MKI67+'] / df['all cells']
-fractions1 = fractions[df['cohort'] == '1']
-fractions2 = fractions[df['cohort'] == '2']
-mean1 = mean(fractions1)
-mean2 = mean(fractions2)
-print((mean1, mean2, mean1 / mean2))
+from accessors import get_default_host
+from accessors import univariate_pair_compare as compare
+
+def test(host):
+    study = 'Melanoma CyTOF ICI'
+    access = DataAccessor(study, host=host)
+
+    antigen_experienced_cytotoxic = {'positive_markers': ['CD8A', 'CD3', 'CD45RO'], 'negative_markers': []}
+
+    # The average value of the neighborhood enrichment score for phenotype(s) CD3+ CD45RO+ CD8A+ and
+    # Melanoma is 1.39 times higher in cohort 1 than in cohort 2.
+    df = access.neighborhood_enrichment([antigen_experienced_cytotoxic, 'Melanoma'])
+    values1 = df[df['cohort'] == '1']['neighborhood enrichment, CD8A+ CD3+ CD45RO+ and Melanoma']
+    values2 = df[df['cohort'] == '2']['neighborhood enrichment, CD8A+ CD3+ CD45RO+ and Melanoma']
+    # handle_expected_actual(1.234, mean1 / mean2)
+    # # handle_expected_actual(1.39, mean1 / mean2)
+    compare(values2, values1, expected_fold=1.234, do_log_fold=True)
+
+    # On average, the fraction of cells that are CD8A+ CD3+ CD45RO+ and MKI67+ is 1.41 times higher
+    # in cohort 1 than in cohort 2.
+    proliferative = {'positive_markers': ['MKI67'], 'negative_markers': []}
+    df = access.counts([antigen_experienced_cytotoxic, proliferative])
+    fractions = df['CD8A+ CD3+ CD45RO+ and MKI67+'] / df['all cells']
+    fractions1 = fractions[df['cohort'] == '1']
+    fractions2 = fractions[df['cohort'] == '2']
+    compare(fractions2, fractions1, expected_fold=1.41)
+
+
+if __name__=='__main__':
+    host: str | None
+    if len(sys.argv) == 2:
+        host = sys.argv[1]
+    else:
+        host = get_default_host(None)
+    if host is None:
+        raise RuntimeError('Could not determine API server.')
+    test(host)
diff --git a/analysis_replication/melanoma_il2.py b/analysis_replication/melanoma_il2.py
index 1122fc7d6..2fc75a363 100644
--- a/analysis_replication/melanoma_il2.py
+++ b/analysis_replication/melanoma_il2.py
@@ -1,71 +1,96 @@
-"""Data analysis script for one dataset."""
+"""Data analysis script for one the "Melanoma intralesional IL2" study."""
 
 import sys
+from pandas import DataFrame, Series
 
-from numpy import mean
+from scipy.stats import fisher_exact
 
 from accessors import DataAccessor
+from accessors import get_default_host
+from accessors import univariate_pair_compare as compare
+from accessors import handle_expected_actual
 
-study = 'Melanoma intralesional IL2'
-if len(sys.argv) == 1:
-    access = DataAccessor(study)
-else:
-    host = sys.argv[1]
+
+def test(host):
+    study = 'Melanoma intralesional IL2'
     access = DataAccessor(study, host=host)
 
-exhausted = {'positive_markers': ['KI67', 'PD1', 'LAG3', 'TIM3'], 'negative_markers': []}
-df = access.counts(['CD8+ T cell', exhausted])
-print(df)
+    exhausted = {'positive_markers': ['KI67', 'PD1', 'LAG3', 'TIM3'], 'negative_markers': []}
+    df = access.counts(['CD8+ T cell', exhausted])
+
+    # On average, the fraction of cells that are CD8+ T cells and KI67+ LAG3+ PD1+ TIM3+ is 24.51
+    # times higher in cohort 3 than in cohort 1.
+    fractions = df['CD8+ T cell and KI67+ PD1+ LAG3+ TIM3+'] / df['all cells']
+    fractions1 = fractions[df['cohort'] == '1']
+    fractions3 = fractions[df['cohort'] == '3']
+    compare(fractions1, fractions3, expected_fold=24.51, show_pvalue=True)
+
+    # On average, the ratio of the number of cells that are CD8+ T cells and KI67+ LAG3+ PD1+ TIM3+
+    # to those that are CD8+ T cells is 6.29 times higher in cohort 3 than in cohort 1.
+    fractions = df['CD8+ T cell and KI67+ PD1+ LAG3+ TIM3+'] / df['CD8+ T cell']
+    fractions1 = fractions[df['cohort'] == '1']
+    fractions3 = fractions[df['cohort'] == '3']
+    compare(fractions1, fractions3, expected_fold=6.29, show_pvalue=True)
 
-# On average, the fraction of cells that are CD8+ T cells and KI67+ LAG3+ PD1+ TIM3+ is 24.51
-# times higher in cohort 3 than in cohort 1.
-fractions = df['CD8+ T cell and KI67+ PD1+ LAG3+ TIM3+'] / df['all cells']
-fractions1 = fractions[df['cohort'] == '1']
-fractions3 = fractions[df['cohort'] == '3']
-mean1 = mean(fractions1)
-mean3 = mean(fractions3)
-print((mean3, mean1, mean3 / mean1))
+    mhci = {'positive_markers': ['MHCI'], 'negative_markers': []}
+    df = access.counts(['Tumor', mhci])
 
-# On average, the ratio of the number of cells that are CD8+ T cells and KI67+ LAG3+ PD1+ TIM3+
-# to those that are CD8+ T cells is 6.29 times higher in cohort 3 than in cohort 1.
-fractions = df['CD8+ T cell and KI67+ PD1+ LAG3+ TIM3+'] / df['CD8+ T cell']
-fractions1 = fractions[df['cohort'] == '1']
-fractions3 = fractions[df['cohort'] == '3']
-mean1 = mean(fractions1)
-mean3 = mean(fractions3)
-print((mean3, mean1, mean3 / mean1))
+    # On average, the ratio of the number of cells that are MHCI+ and Tumor to those that are Tumor
+    # is 1.86 times higher in cohort 3 than in cohort 1.
+    fractions = df['Tumor and MHCI+'] / df['Tumor']
+    fractions1 = fractions[df['cohort'] == '1']
+    fractions3 = fractions[df['cohort'] == '3']
+    compare(fractions1, fractions3, expected_fold=1.86)
 
-print('')
+    # The average value of the proximity score for phenotype(s) B cells is 3.59 times higher in
+    # cohort 3 than in cohort 1.
+    df = access.proximity(['B cell', 'B cell'])
+    values1 = df[df['cohort'] == '1']['proximity, B cell and B cell']
+    values3 = df[df['cohort'] == '3']['proximity, B cell and B cell']
+    compare(values1, values3, expected_fold=3.59)
 
-mhci = {'positive_markers': ['MHCI'], 'negative_markers': []}
-df = access.counts(['Tumor', mhci])
-print(df)
+    # The average value of the neighborhood enrichment score for phenotype(s) B cells is 80.45 times
+    # higher in cohort 1 than in cohort 3.
+    df = access.neighborhood_enrichment(['B cell', 'B cell'])
+    values1 = df[df['cohort'] == '1']['neighborhood enrichment, B cell and B cell']
+    values3 = df[df['cohort'] == '3']['neighborhood enrichment, B cell and B cell']
+    compare(values3, values1, expected_fold=80.45, do_log_fold=True)
 
-# On average, the ratio of the number of cells that are MHCI+ and Tumor to those that are Tumor
-# is 1.86 times higher in cohort 3 than in cohort 1.
-fractions = df['Tumor and MHCI+'] / df['Tumor']
-fractions1 = fractions[df['cohort'] == '1']
-fractions3 = fractions[df['cohort'] == '3']
-mean1 = mean(fractions1)
-mean3 = mean(fractions3)
-print((mean3, mean1, mean3 / mean1))
+    for phenotype, expected_baseline, expected_percentage, expected_p in [
+        ('Adipocyte or Langerhans cell', 3.593e-2, 80, 6e-34),
+        ([{'positive_markers': ['S100B'], 'negative_markers': ['SOX10']}], 6.710e-2, 82, 2.783e-21)
+    ]:
+        result_df = DataFrame(columns=['odd ratio', 'p-value'])
+        result_df.index.name = 'specimen'
+        df = access.counts(phenotype)
+        df = df[df['cohort'].isin({'1', '3'})]
+        important_proportion = access.important(phenotype)
+        if (type(phenotype) is list):
+            phenotype = access.name_for_all_phenotypes(phenotype)
+        for specimen, row in df.iterrows():
+            n_cells_of_this_phenotype = row[phenotype][0]
+            n_cells_total = row['all cells']
+            p_important = important_proportion[specimen]
+            odd_ratio, p_value = fisher_exact([
+                [n_cells_of_this_phenotype, p_important],
+                [n_cells_total, 100],
+            ])
+            result_df.loc[specimen] = [odd_ratio, p_value]
+        print(f'\nBaseline presence of {phenotype}')
+        handle_expected_actual(expected_baseline, (df[phenotype].iloc[:, 0]/df['all cells']).mean())
+        print(f'\nPercentage of top 100 most important cells')
+        handle_expected_actual(expected_percentage, Series(important_proportion).mean())
+        print(f'\nLargest p-value from fisher exact test across all specimens')
+        handle_expected_actual(expected_p, result_df['p-value'].max())
+        print('')
 
-# The average value of the proximity score for phenotype(s) B cells is 3.59 times higher in
-# cohort 3 than in cohort 1.
-df = access.proximity(['B cell', 'B cell'])
-print(df)
-values1 = df[df['cohort'] == '1']['proximity, B cell and B cell']
-values3 = df[df['cohort'] == '3']['proximity, B cell and B cell']
-mean1 = mean(values1)
-mean3 = mean(values3)
-print((mean3, mean1, mean3 / mean1))
 
-# The average value of the neighborhood enrichment score for phenotype(s) B cells is 80.45 times
-# higher in cohort 1 than in cohort 3.
-df = access.neighborhood_enrichment(['B cell', 'B cell'])
-print(df)
-values1 = df[df['cohort'] == '1']['neighborhood enrichment, B cell and B cell']
-values3 = df[df['cohort'] == '3']['neighborhood enrichment, B cell and B cell']
-mean1 = mean(values1)
-mean3 = mean(values3)
-print((mean1, mean3, mean1 / mean3))
+if __name__ == '__main__':
+    host: str | None
+    if len(sys.argv) == 2:
+        host = sys.argv[1]
+    else:
+        host = get_default_host(None)
+    if host is None:
+        raise RuntimeError('Could not determine API server.')
+    test(host)
diff --git a/analysis_replication/results.txt b/analysis_replication/results.txt
new file mode 100644
index 000000000..43d3f1e74
--- /dev/null
+++ b/analysis_replication/results.txt
@@ -0,0 +1,29 @@
+Melanoma intralesional IL2
+
+24.511033976415323   (0.010047670356693218, 0.0004099243779907919, 24.511033976415323)  p-value: 0.05666978568568671
+6.294433588209584    (0.047486682117734144, 0.007544234354411779, 6.294433588209584)  p-value: 0.03268081246064705
+1.8632253173760547   (0.7741293927288532, 0.4154781418594317, 1.8632253173760547)
+3.591943278657352    (6.546755795065627, 1.822622265213711, 3.591943278657352)
+80.44603390109388    (0.13545300354857456, 0.0016837747864004108, 80.44603390109388)  log fold: 0.3918886790459368
+
+Melanoma CyTOF ICI
+
+1.2344305898477363   (0.579526623254723, 0.46946878019784494, 1.2344305898477363)  log fold: 1.22996178379338
+1.4110054329515245   (0.05109818481900941, 0.0362140241459757, 1.4110054329515245)
+
+Breast cancer IMC
+
+1.6215579838416214   (53.51380710437757, 33.00147613445089, 1.6215579838416214)  p-value: 0.03558768297763137
+1.2652227187661162   (13.995947353340513, 11.062042394393446, 1.2652227187661162)
+0.008058344647539068
+0.009062759768070526
+111.31691959795391   (5.236077375766945, 0.047037569802310525, 111.31691959795391)  p-value: 0.5115235822483603
+11.39823595697709    (5.236077375766945, 0.4593761171053699, 11.39823595697709)  p-value: 0.4816762958394687
+
+LUAD progression
+
+1.5708529575671681   (0.3472407606181818, 0.22105236454210525, 1.5708529575671681)  p-value: 0.11338628422238624
+797.4567230612785    (0.013456819020965162, 1.687466997495124e-05, 797.4567230612785)  log fold: 0.8493007250260091  p-value (after log): 0.07096354161594853
+9.857655019450066    (0.02504683964315889, 0.002540851713083807, 9.857655019450066)  log fold: 0.8154636349597435  p-value (after log): 0.05431382849024341
+4.5446431417428945   (1.2854687895449681, 0.28285362556585336, 4.5446431417428945)  p-value: 0.06383320899190638
+3.785680226753365    (0.32701237710297304, 0.08638140506215496, 3.785680226753365)  p-value: 0.06481355368930636
diff --git a/analysis_replication/run_all.py b/analysis_replication/run_all.py
new file mode 100644
index 000000000..32bb4eb77
--- /dev/null
+++ b/analysis_replication/run_all.py
@@ -0,0 +1,22 @@
+import sys
+
+from accessors import get_default_host
+
+from melanoma_il2 import test as test1
+from melanoma_ici import test as test2
+from breast_imc import test as test3
+from luad_imc import test as test4
+
+
+if __name__=='__main__':
+    host: str | None
+    if len(sys.argv) == 2:
+        host = sys.argv[1]
+    else:
+        host = get_default_host(None)
+    if host is None:
+        raise RuntimeError('Could not determine API server.')
+    else:
+        print('Using host: ' + str(host))
+    for test in [test1, test2, test3, test4]:
+        test(host)
diff --git a/build/ondemand/Dockerfile b/build/ondemand/Dockerfile
index 03475677e..1f9f16333 100644
--- a/build/ondemand/Dockerfile
+++ b/build/ondemand/Dockerfile
@@ -22,4 +22,4 @@ ENV service_name $service_name
 COPY $WHEEL_FILENAME ./
 RUN python -m pip install "$WHEEL_FILENAME"
 ENV API_SERVER_PORT=8016
-ENTRYPOINT ["spt", "ondemand", "start", "--host", "0.0.0.0", "--port", "8016", "--timeout-seconds", "120"]
+ENTRYPOINT ["spt", "ondemand", "start", "--host", "0.0.0.0", "--port", "8016", "--timeout-seconds", "360"]