From 89f1264d1c66dc7186a09187c0156e0c16b3770c Mon Sep 17 00:00:00 2001
From: Cameron Davidson-Pilon <cam.davidson.pilon@gmail.com>
Date: Fri, 25 Jan 2019 09:49:42 -0500
Subject: [PATCH] V0.17.4 (#616)

* v0.17.4

* lint

* bump version

* lint
---
 CHANGELOG.md                      |  4 +++
 docs/conf.py                      |  2 +-
 lifelines/datasets/__init__.py    |  4 +--
 lifelines/fitters/coxph_fitter.py | 42 +++++++++++++++++++++++--------
 lifelines/plotting.py             | 21 +++++++++-------
 lifelines/version.py              |  2 +-
 tests/test_estimation.py          |  4 +--
 tests/test_plotting.py            | 36 +++++++++++++++++++++++++-
 8 files changed, 88 insertions(+), 27 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3aebf867f..fbc6a95a6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,9 @@
 ### Changelogs
 
+#### 0.17.4
+ - Fix bug in `plot_covariate_groups` that wasn't allowing for strata to be used. 
+ - change name of `multicenter_aids_cohort_study` to `load_multicenter_aids_cohort_study`
+
 #### 0.17.3
  - Fix in `compute_residuals` when using `schoenfeld` and the minumum duration has only censored subjects. 
 
diff --git a/docs/conf.py b/docs/conf.py
index f6b361343..a92ed6e8f 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -60,7 +60,7 @@
 #
 # The short X.Y version.
 
-version = "0.17.3"
+version = "0.17.4"
 # The full version, including dev info
 release = version
 
diff --git a/lifelines/datasets/__init__.py b/lifelines/datasets/__init__.py
index f41f39379..b37e47234 100644
--- a/lifelines/datasets/__init__.py
+++ b/lifelines/datasets/__init__.py
@@ -50,7 +50,7 @@ def load_recur(**kwargs):
     return load_dataset("recur.csv", **kwargs)
 
 
-def multicenter_aids_cohort_study(**kwargs):
+def load_multicenter_aids_cohort_study(**kwargs):
     """
     Originally in [1]
 
@@ -62,7 +62,7 @@ def multicenter_aids_cohort_study(**kwargs):
     D: indicator of death during follow up
 
 
-    i   AIDSY       W      T    D
+    i   AIDSY       W      T        D
     1   1990.425    4.575   7.575   0
     2   1991.250    3.750   6.750   0
     3   1992.014    2.986   5.986   0
diff --git a/lifelines/fitters/coxph_fitter.py b/lifelines/fitters/coxph_fitter.py
index 7eb790cd3..5da2a352d 100644
--- a/lifelines/fitters/coxph_fitter.py
+++ b/lifelines/fitters/coxph_fitter.py
@@ -31,6 +31,7 @@
 from lifelines.utils import (
     _get_index,
     _to_list,
+    _to_array,
     survival_table_from_events,
     inv_normal_cdf,
     normalize,
@@ -1537,7 +1538,7 @@ def plot(self, columns=None, display_significance_code=True, **errorbar_kwargs):
 
         return ax
 
-    def plot_covariate_groups(self, covariate, groups, **kwargs):
+    def plot_covariate_groups(self, covariate, values, **kwargs):
         """
         Produces a visual representation comparing the baseline survival curve of the model versus
         what happens when a covariate is varied over values in a group. This is useful to compare
@@ -1548,14 +1549,14 @@ def plot_covariate_groups(self, covariate, groups, **kwargs):
         ----------
         covariate: string
             a string of the covariate in the original dataset that we wish to vary.
-        groups: iterable
+        values: iterable
             an iterable of the values we wish the covariate to take on.
         kwargs:
             pass in additional plotting commands
 
         Returns
         -------
-        ax: matplotlib axis
+        ax: matplotlib axis, or list of axis'
             the matplotlib axis that be edited.
         """
         from matplotlib import pyplot as plt
@@ -1563,15 +1564,34 @@ def plot_covariate_groups(self, covariate, groups, **kwargs):
         if covariate not in self.hazards_.columns:
             raise KeyError("covariate `%s` is not present in the original dataset" % covariate)
 
-        ax = kwargs.get("ax", None) or plt.figure().add_subplot(111)
-        x_bar = self._norm_mean.to_frame().T
-        X = pd.concat([x_bar] * len(groups))
-        X.index = ["%s=%s" % (covariate, g) for g in groups]
-        X[covariate] = groups
+        if self.strata is None:
+            axes = kwargs.get("ax", None) or plt.figure().add_subplot(111)
+            x_bar = self._norm_mean.to_frame().T
+            X = pd.concat([x_bar] * len(values))
+            X.index = ["%s=%s" % (covariate, g) for g in values]
+            X[covariate] = values
 
-        self.predict_survival_function(X).plot(ax=ax)
-        self.baseline_survival_.plot(ax=ax, ls="--")
-        return ax
+            self.predict_survival_function(X).plot(ax=axes)
+            self.baseline_survival_.plot(ax=axes, ls="--")
+
+        else:
+            axes = []
+            for stratum, baseline_survival_ in self.baseline_survival_.iteritems():
+                ax = plt.figure().add_subplot(1, 1, 1)
+                x_bar = self._norm_mean.to_frame().T
+
+                for name, value in zip(self.strata, _to_array(stratum)):
+                    x_bar[name] = value
+
+                X = pd.concat([x_bar] * len(values))
+                X.index = ["%s=%s" % (covariate, g) for g in values]
+                X[covariate] = values
+
+                self.predict_survival_function(X).plot(ax=ax)
+                baseline_survival_.plot(ax=ax, ls="--", label="stratum %s baseline survival" % str(stratum))
+                plt.legend()
+                axes.append(ax)
+        return axes
 
     def check_assumptions(
         self, training_df, advice=True, show_plots=True, p_value_threshold=0.05, plot_n_bootstraps=10
diff --git a/lifelines/plotting.py b/lifelines/plotting.py
index 9bd4588de..751d44aec 100644
--- a/lifelines/plotting.py
+++ b/lifelines/plotting.py
@@ -171,11 +171,11 @@ def add_at_risk_counts(*fitters, **kwargs):
 
 
 def plot_lifetimes(
-    duration,
+    durations,
     event_observed=None,
     entry=None,
     left_truncated=False,
-    sort_by_duration=False,
+    sort_by_duration=True,
     event_observed_color="#A60628",
     event_censored_color="#348ABD",
     **kwargs
@@ -185,7 +185,7 @@ def plot_lifetimes(
     
     Parameters
     -----------
-    duration: (n,) numpy array or pd.Series
+    durations: (n,) numpy array or pd.Series
        duration subject was observed for.
     event_observed: (n,) numpy array or pd.Series
       array of booleans: True if event observed, else False.
@@ -209,7 +209,7 @@ def plot_lifetimes(
     set_kwargs_ax(kwargs)
     ax = kwargs.pop("ax")
 
-    N = duration.shape[0]
+    N = durations.shape[0]
     if N > 80:
         warnings.warn("For less visual clutter, you may want to subsample to less than 80 individuals.")
 
@@ -219,20 +219,23 @@ def plot_lifetimes(
     if entry is None:
         entry = np.zeros(N)
 
+    assert durations.shape == (N,)
+    assert event_observed.shape == (N,)
+
     if sort_by_duration:
-        # order by length of lifetimes; probably not very informative.
-        ix = np.argsort(duration, 0)
-        duration = duration[ix]
+        # order by length of lifetimes;
+        ix = np.argsort(entry + durations, 0)
+        durations = durations[ix]
         event_observed = event_observed[ix]
         entry = entry[ix]
 
     for i in range(N):
         c = event_observed_color if event_observed[i] else event_censored_color
-        ax.hlines(N - 1 - i, entry[i], entry[i] + duration[i], color=c, lw=1.5)
+        ax.hlines(N - 1 - i, entry[i], entry[i] + durations[i], color=c, lw=1.5)
         if left_truncated:
             ax.hlines(N - 1 - i, 0, entry[i], color=c, lw=1.0, linestyle="--")
         m = "" if not event_observed[i] else "o"
-        ax.scatter(entry[i] + duration[i], N - 1 - i, color=c, marker=m, s=10)
+        ax.scatter(entry[i] + durations[i], N - 1 - i, color=c, marker=m, s=10)
 
     ax.set_ylim(-0.5, N)
     return ax
diff --git a/lifelines/version.py b/lifelines/version.py
index f9a4af8fd..90daa1f8c 100644
--- a/lifelines/version.py
+++ b/lifelines/version.py
@@ -1,4 +1,4 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 
-__version__ = "0.17.3"
+__version__ = "0.17.4"
diff --git a/tests/test_estimation.py b/tests/test_estimation.py
index 7cc9b34e6..4d0ab6496 100644
--- a/tests/test_estimation.py
+++ b/tests/test_estimation.py
@@ -57,7 +57,7 @@
     load_holly_molly_polly,
     load_regression_dataset,
     load_stanford_heart_transplants,
-    multicenter_aids_cohort_study,
+    load_multicenter_aids_cohort_study,
 )
 from lifelines.generate_datasets import generate_hazard_rates, generate_random_lifetimes
 
@@ -435,7 +435,7 @@ def kaplan_meier(self, lifetimes, observed=None):
         return km.reshape(len(km), 1)
 
     def test_left_truncation_against_Cole_and_Hudgens(self):
-        df = multicenter_aids_cohort_study()
+        df = load_multicenter_aids_cohort_study()
         kmf = KaplanMeierFitter()
         kmf.fit(df["T"], event_observed=df["D"], entry=df["W"])
 
diff --git a/tests/test_plotting.py b/tests/test_plotting.py
index c1d146639..261f9890e 100644
--- a/tests/test_plotting.py
+++ b/tests/test_plotting.py
@@ -17,6 +17,8 @@
     load_lcd,
     load_panel_test,
     load_stanford_heart_transplants,
+    load_rossi,
+    load_multicenter_aids_cohort_study,
 )
 from lifelines.generate_datasets import cumulative_integral
 
@@ -171,6 +173,22 @@ def test_plot_lifetimes_left_truncation(self, block):
         self.plt.title("test_plot_lifetimes_left_truncation")
         self.plt.show(block=block)
 
+    def test_MACS_data_with_plot_lifetimes(self, block):
+        df = load_multicenter_aids_cohort_study()
+
+        plot_lifetimes(
+            df["T"] - df["W"],
+            event_observed=df["D"],
+            entry=df["W"],
+            event_observed_color="#383838",
+            event_censored_color="#383838",
+            left_truncated=True,
+        )
+        self.plt.ylabel("Patient Number")
+        self.plt.xlabel("Years from AIDS diagnosis")
+        self.plt.title("test_MACS_data_with_plot_lifetimes")
+        self.plt.show(block=block)
+
     def test_plot_lifetimes_relative(self, block):
         t = np.linspace(0, 20, 1000)
         hz, coef, covrt = generate_hazard_rates(1, 5, t)
@@ -279,12 +297,28 @@ def test_coxph_plotting_with_subset_of_columns(self, block):
         self.plt.title("test_coxph_plotting_with_subset_of_columns")
         self.plt.show(block=block)
 
+    def test_coxph_plot_covariate_groups(self, block):
+        df = load_rossi()
+        cp = CoxPHFitter()
+        cp.fit(df, "week", "arrest")
+        cp.plot_covariate_groups("age", [10, 50, 80])
+        self.plt.title("test_coxph_plot_covariate_groups")
+        self.plt.show(block=block)
+
+    def test_coxph_plot_covariate_groups_with_strata(self, block):
+        df = load_rossi()
+        cp = CoxPHFitter()
+        cp.fit(df, "week", "arrest", strata=["paro"])
+        cp.plot_covariate_groups("age", [10, 50, 80])
+        self.plt.title("test_coxph_plot_covariate_groups_with_strata")
+        self.plt.show(block=block)
+
     def test_coxtv_plotting_with_subset_of_columns(self, block):
         df = load_stanford_heart_transplants()
         ctv = CoxTimeVaryingFitter()
         ctv.fit(df, id_col="id", event_col="event")
         ctv.plot(columns=["age", "year"])
-        self.plt.title("test_coxtv_plotting_with_subset_of_columns_and_standardized")
+        self.plt.title("test_coxtv_plotting_with_subset_of_columns")
         self.plt.show(block=block)
 
     def test_coxtv_plotting(self, block):