From c476697331f412c08998b10b582ebbfa4b5654fd Mon Sep 17 00:00:00 2001
From: Aron Mirwald <aron.mirwald@sed.ethz.ch>
Date: Tue, 18 Feb 2025 16:29:21 +0900
Subject: [PATCH] adjust b_significant plotting mostly

---
 seismostats/analysis/b_significant.py         |  66 ++++---
 .../analysis/tests/test_b_significant.py      |   6 +-
 seismostats/plots/statistical.py              | 166 ++++++++++--------
 3 files changed, 134 insertions(+), 104 deletions(-)

diff --git a/seismostats/analysis/b_significant.py b/seismostats/analysis/b_significant.py
index 1b98dbb..a8e3bcd 100644
--- a/seismostats/analysis/b_significant.py
+++ b/seismostats/analysis/b_significant.py
@@ -148,6 +148,7 @@ def values_from_partitioning(
     list_mc: list[float] | float,
     delta_m: float,
     method: AValueEstimator | BValueEstimator = ClassicBValueEstimator,
+    list_scaling: list | None = None,
     * args,
     **kwargs,
 ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
@@ -164,13 +165,15 @@ def values_from_partitioning(
         delta_m:    Discretization of magnitudes.
         method:     Method to estimate the value. This could be an a-value or
                 b-value estimator.
+        list_scaling: List of scaling factors for the a-value estimation. Only
+                used in case the method is an a-value estimator.
         *args:      Additional arguments to the a/b-value estimation method.
         **kwargs:   Additional keyword arguments to the a/b-value estimation
                 method.
 
     Returns:
-        b_values:   Series of b-values, each one is estimated from the
-                magnitudes contained in the corresponding element of
+        values:     Series of b-values (or a-values), each one is estimated
+                from the magnitudes contained in the corresponding element of
                 ``list_magnitudes``.
         std_b:      Standard deviations corresponding to the b-values.
         n_ms:       Number of events used for the b-value estimates.
@@ -208,6 +211,10 @@ def values_from_partitioning(
             raise IndexError(
                 "Length of list_mc must be the same as list_magnitudes.")
         list_mc = np.array(list_mc)
+    if issubclass(method, AValueEstimator):
+        if list_scaling is None:
+            list_scaling = np.ones(n_subsets)
+        list_scaling = np.array(list_scaling)
 
     # start estimation
     estimator = method()
@@ -222,11 +229,17 @@ def values_from_partitioning(
         mags_loop = mags_loop[idx_sorted]
         times_loop = times_loop[idx_sorted]
 
-        estimator.calculate(
-            mags_loop, mc=list_mc[ii], delta_m=delta_m, *args, **kwargs)
         if isinstance(estimator, AValueEstimator):
+            estimator.calculate(
+                mags_loop,
+                mc=list_mc[ii],
+                delta_m=delta_m,
+                scaling_factor=list_scaling[ii],
+                *args, **kwargs)
             values[ii] = estimator.a_value
         elif isinstance(estimator, BValueEstimator):
+            estimator.calculate(
+                mags_loop, mc=list_mc[ii], delta_m=delta_m, *args, **kwargs)
             values[ii] = estimator.b_value
         stds[ii] = estimator.std
         n_ms[ii] = estimator.n
@@ -268,17 +281,17 @@ def cut_constant_idx(
 
 
 def b_significant_1D(
-        mags: np.ndarray,
+        magnitudes: np.ndarray,
         mc: float | np.ndarray,
         delta_m: float,
-        times: np.ndarray[np.timedelta64],
+        times: np.ndarray,
         n_m: int,
         min_num: int = 10,
         method: BValueEstimator | AValueEstimator = ClassicBValueEstimator,
         conservative: bool = True,
         *args,
         ** kwargs,
-) -> tuple[float, float, float]:
+) -> tuple[float, float, float, float]:
     """
     Estimates the significance of variation of b-values (or a-values) along a
     one-dimensional series of events.
@@ -291,11 +304,12 @@ def b_significant_1D(
     (which can be estimated by len(magnitudes) / n_m) should be used.
 
     Args:
-        mags:           Magnitudes of the events. They are assumed to be order
+        magnitudes:     Magnitudes of the events. They are assumed to be order
                     along the dimension of interest (e.g. time or depth)
         mc:             Completeness magnitude. If a single value is provided,
                     it is used for all magnitudes. Otherwise, the individual
-                    completeness of each magnitude can be provided.
+                    completeness of each magnitude can be provided. This will
+                    be used to filter the magnitudes.
         delta_m:        Magnitude bin width.
         times:          Times of the events.
         n_m:            Number of magnitudes in each partition.
@@ -325,32 +339,34 @@ def b_significant_1D(
         autocorrelation for different n_m, use
         :func:`seismostats.plots.plot_b_significant_1D`
     """
-    # sanity checks
-    mags = np.array(mags)
+    # sanity checks and preparation
+    magnitudes = np.array(magnitudes)
     times = np.array(times)
+    if len(magnitudes) != len(times):
+        raise IndexError("Magnitudes and times must have the same length.")
     if isinstance(mc, (float, int)):
-        if min(mags) < mc:
-            raise ValueError("The completeness magnitude is larger than the "
-                             "smallest magnitude.")
-        mc = np.ones(len(mags)) * mc
+        mc = np.ones(len(magnitudes)) * mc
     else:
         mc = np.array(mc)
-        if any(mags < mc):
-            raise ValueError("There are earthquakes below their respective "
-                             "completeness magnitude.")
     if n_m < min_num:
         raise ValueError("n_m cannot be smaller than min_num.")
 
-    if len(mags) / n_m < 3:
-        raise ValueError(
-            "n_m is too large - less than three subsamples are created.")
-    elif len(mags) / n_m < 25:
+    idx = magnitudes >= mc - delta_m / 2
+    magnitudes = magnitudes[idx]
+    times = times[idx]
+    mc = mc[idx]
+
+    if len(magnitudes) / n_m < 3:
+        if get_option("warnings") is True:
+            warnings.warn(
+                "n_m is too large - less than three subsamples are created."
+                "return NaNs")
+        return np.nan, np.nan, np.nan, np.nan
+    elif len(magnitudes) / n_m < 25:
         if get_option("warnings") is True:
             warnings.warn(
                 "The number of subsamples is less than 25. The normality "
                 "assumption of the autocorrelation might not be valid.")
-    if len(mags) != len(times):
-        raise IndexError("Magnitudes and times must have the same length.")
 
     # Estimate a and b values for n_m realizations.
     ac_1D = np.zeros(n_m)
@@ -360,7 +376,7 @@ def b_significant_1D(
     for ii in range(n_m):
         # partition data
         idx, list_magnitudes = cut_constant_idx(
-            mags, n_m, offset=ii
+            magnitudes, n_m, offset=ii
         )
         list_times = np.array_split(times, idx)
         list_mc = np.array_split(mc, idx)
diff --git a/seismostats/analysis/tests/test_b_significant.py b/seismostats/analysis/tests/test_b_significant.py
index 7ee7a0b..0e124fe 100644
--- a/seismostats/analysis/tests/test_b_significant.py
+++ b/seismostats/analysis/tests/test_b_significant.py
@@ -171,7 +171,7 @@ def test_b_significant_1D():
     assert_almost_equal(mu_mac, -0.020387359836901122)
     assert_almost_equal(std_mac, 0.1382577696134609)
 
-    with pytest.raises(ValueError):
+    with pytest.warns(UserWarning):
         # mags larger than mc present
         b_significant_1D(
             mags,
@@ -188,7 +188,7 @@ def test_b_significant_1D():
             times=times,
             n_m=10,
             min_num=11)
-    with pytest.raises(ValueError):
+    with pytest.warns(UserWarning):
         # n_m larger than len(mags)/3
         b_significant_1D(
             mags,
@@ -196,7 +196,7 @@ def test_b_significant_1D():
             delta_m=1,
             times=times,
             n_m=500)
-    with pytest.raises(IndexError):
+    with pytest.raises(ValueError):
         # times and mags have different lengths
         b_significant_1D(
             mags,
diff --git a/seismostats/plots/statistical.py b/seismostats/plots/statistical.py
index eba02c3..70d2fc3 100644
--- a/seismostats/plots/statistical.py
+++ b/seismostats/plots/statistical.py
@@ -22,21 +22,22 @@ def plot_mc_vs_b(
     ax: plt.Axes | None = None,
     color: str = "blue",
     label: str | None = None,
+    *args,
     **kwargs,
 ) -> plt.Axes:
     """Plots the estimated b-value in dependence of the completeness magnitude.
 
     Args:
-        magnitudes: magnitudes of the catalog
-        mcs:        completeness magnitudes (list or numpy array)
-        delta_m:    discretization of the magnitudes
-        method:     method used for b-value estimation
-        confidence_intvl:   confidence interval that should be plotted
-        ax:         axis where figure should be plotted
-        color:      color of the data
-        label:      label of the data that will be put in the legend
-        **kwargs:   Additional keyword arguments for the b-value
-                estimator.
+        magnitudes:     Magnitudes of the catalog.
+        mcs:            Completeness magnitudes.
+        delta_m:        Discretization of the magnitudes.
+        method:         Method used for b-value estimation.
+        confidence_intvl: Confidence interval that should be plotted.
+        ax:             Axis where figure should be plotted.
+        color:          Color of the data.
+        label:          Label of the data that will be put in the legend.
+        *args:          Additional arguments for the b-value estimator.
+        **kwargs:       Additional keyword arguments for the b-value estimator.
 
     Returns:
         ax that was plotted on
@@ -48,7 +49,7 @@ def plot_mc_vs_b(
 
     for mc in mcs:
         estimator.calculate(
-            magnitudes, mc=mc, delta_m=delta_m, **kwargs)
+            magnitudes, mc=mc, delta_m=delta_m, *args, **kwargs)
         b_values.append(estimator.b_value)
         b_errors.append(estimator.std)
 
@@ -78,9 +79,9 @@ def plot_mc_vs_b(
 
 
 def plot_b_series_constant_nm(
-        mags: np.ndarray,
+        magnitudes: np.ndarray,
         delta_m: float,
-        mc: np.ndarray,
+        mc: float | np.ndarray,
         times: np.ndarray,
         n_m: int,
         min_num: float = 2,
@@ -98,69 +99,63 @@ def plot_b_series_constant_nm(
     Plots the b-values estimated from a running window of n_m magnitudes.
 
     Args:
-        mags:   magnitudes of the events. If x_variable is None, the magnitudes
-            are assumed to be sorted in the dimension of interest.
-        delta_m:    magnitude bin width
-        mc:     completeness magnitude. If a single value is provided, it is
-            used for all magnitudes. Otherwise, the individual completeness of
-            each magnitude can be provided.
-        times:  times of the events
-        n_m:   number of magnitudes in each partition
-        min_num:    minimum number of events from which a b-value is estimated.
-            If the number of events is smaller, the b-value is set to np.nan
-        b_method:   method to estimate the b-values
-        plot_technique:    technique where to plot the b-values with respect to
-            the x-variable. Options are 'left', 'midpoint', 'right'. If set to
-            'right' (default), the b-value is plotted at the right edge. For
-            time series, this is the most common choice, as it avoids optical
-            illusions of the b-value predicting future seismicity.
-        x_variable: values of the dimension of interest, along which the
-            b-values should be plotted. It should be a 1D array with the same
-            length as the magnitudes, e.g., the time of the events. If None,
-            the b-values are plotted against the event index.
-        confidence:    confidence interval that should be plotted. Default
-            is 0.95 (i.e., the 95% confidence interval is plotted)
-        ax:     axis where the plot should be plotted
-        color:  color of the data
-        label:  abel of the data that will be put in the legend
-        *args:  Additional positional arguments for the b-value estimator.
+        magnitudes: Magnitudes of the events. If x_variable is None, the
+                magnitudes are assumed to be sorted in the dimension of
+                interest.
+        delta_m:    Magnitude bin width
+        mc:         Completeness magnitude. If a single value is provided, it is
+                used for all magnitudes. Otherwise, the individual completeness
+                of each magnitude can be provided.
+        times:      times of the events
+        n_m:        number of magnitudes in each partition
+        min_num:    Minimum number of events from which a b-value is estimated.
+                If the number of events is smaller, the b-value is set to np.nan
+        b_method:   Method to estimate the b-values
+        plot_technique: Technique where to plot the b-values with respect to
+                the x-variable. Options are 'left', 'midpoint', 'right'. If set
+                to 'right' (default), the b-value is plotted at the right edge.
+                For time series, this is the most common choice, as it avoids
+                optical illusions of the b-value predicting future seismicity.
+        x_variable: Values of the dimension of interest, along which the
+                b-values should be plotted. It should be a 1D array with the
+                same length as the magnitudes, e.g., the time of the events. If
+                None, the b-values are plotted against the event index.
+        confidence: Confidence interval that should be plotted. Default
+                is 0.95 (i.e., the 95% confidence interval is plotted)
+        ax:         axis where the plot should be plotted
+        color:      Color of the data
+        label:      Label of the data that will be put in the legend
+        *args:      Additional positional arguments for the b-value estimator.
         **kwargs:   Additional keyword arguments for the b-value estimator.
 
     Returns:
-        ax that was plotted on
+        ax:         Ax that was plotted on.
     """
-    mags = np.array(mags)
+    # sanity checks and preparation
+    magnitudes = np.array(magnitudes)
     times = np.array(times)
-
     if isinstance(mc, (float, int)):
-        if min(mags) < mc:
-            raise ValueError("The completeness magnitude is larger than the "
-                             "smallest magnitude")
-        mc = np.ones(len(mags)) * mc
+        mc = np.ones(len(magnitudes)) * mc
     else:
-        if any(mags < mc):
-            raise ValueError("There are earthquakes below their respective "
-                             "completeness magnitude")
+        mc = np.array(mc)
     if n_m < min_num:
-        raise ValueError("n_m cannot be smaller than min_num")
-    if len(mags) != len(times):
-        raise IndexError("mags and times must have the same length")
-
+        raise ValueError("n_m cannot be smaller than min_num.")
+    if len(magnitudes) != len(times):
+        raise IndexError("Magnitudes and times must have the same length.")
     if x_variable is None:
-        x_variable = np.arange(len(mags))
-    elif len(x_variable) != len(mags):
+        x_variable = np.arange(len(magnitudes))
+    elif len(x_variable) != len(magnitudes):
         raise IndexError(
-            "x_variable must have the same length as magnitudes")
+            "x_variable must have the same length as magnitudes.")
     else:
+        x_variable = np.array(x_variable)
         idx_sort = np.argsort(x_variable)
-        mags = mags[idx_sort]
+        magnitudes = magnitudes[idx_sort]
         times = times[idx_sort]
         mc = mc[idx_sort]
         x_variable = x_variable[idx_sort]
-
     if ax is None:
         _, ax = plt.subplots()
-
     if plot_technique == 'left':
         idx_start = 0
     elif plot_technique == 'midpoint':
@@ -169,25 +164,33 @@ def plot_b_series_constant_nm(
         idx_start = n_m - 1
     else:
         raise ValueError(
-            "plot_technique must be 'left', 'midpoint', or 'right'")
+            "plot_technique must be 'left', 'midpoint', or 'right'.")
+
+    # filter magnitudes
+    idx = magnitudes >= mc - delta_m / 2
+    magnitudes = magnitudes[idx]
+    times = times[idx]
+    mc = mc[idx]
+    x_variable = x_variable[idx]
 
     # estimation
     estimator = b_method()
-    b_values = np.ones(len(mags)) * np.nan
-    std_bs = np.ones(len(mags)) * np.nan
-    for ii in range(len(mags) - n_m + 1):
+    b_values = np.ones(len(magnitudes)) * np.nan
+    std_bs = np.ones(len(magnitudes)) * np.nan
+    for ii in range(len(magnitudes) - n_m + 1):
         # runnning window of n_m magnitudes
-        mags_window = mags[ii:ii + n_m]
+        mags_window = magnitudes[ii:ii + n_m]
         times_window = times[ii:ii + n_m]
+        mc_window = mc[ii:ii + n_m]
 
-        # sort the magnitudes and times
+        # sort the magnitudes by time
         idx = np.argsort(times_window)
         mags_window = mags_window[idx]
-        times_window = times_window[idx]
+        mc_window = mc_window[idx]
 
         # estimate the b-value
         estimator.calculate(
-            mags_window, mc=mc[ii], delta_m=delta_m, *args, **kwargs)
+            mags_window, mc=max(mc_window), delta_m=delta_m, *args, **kwargs)
         if estimator.n < min_num:
             b_values[idx_start + ii] = np.nan
             std_bs[idx_start + ii] = np.nan
@@ -262,15 +265,16 @@ def plot_b_significant_1D(
         **kwargs:   Additional keyword arguments for the b-value estimator.
 
     """
-    if n_ms is None:
-        n_ms = np.arange(20, len(magnitudes) / 25, 5).astype(int)
+    # sanity checks and preparation
+    magnitudes = np.array(magnitudes)
+    times = np.array(times)
+
     if isinstance(mc, (float, int)):
         mc = np.ones(len(magnitudes)) * mc
-    elif not isinstance(mc, np.ndarray):
-        raise ValueError("mc must be a float, or numpy array.")
     if x_variable is None:
         x_variable = np.arange(len(magnitudes))
     else:
+        x_variable = np.array(x_variable)
         if len(x_variable) != len(magnitudes):
             raise ValueError(
                 "x_variable must have the same length as magnitudes.")
@@ -282,28 +286,38 @@ def plot_b_significant_1D(
     if ax is None:
         _, ax = plt.subplots()
 
-    # filter magnitudes
     idx = magnitudes >= mc - delta_m / 2
     magnitudes = magnitudes[idx]
     times = times[idx]
     mc = mc[idx]
 
+    if n_ms is None:
+        n_ms = np.arange(20, len(magnitudes) / 25, 10).astype(int)
+
     # estimate the MAC for each n_m
+    p = np.zeros(len(n_ms))
     mac = np.zeros(len(n_ms))
     mu_mac = np.zeros(len(n_ms))
     std_mac = np.zeros(len(n_ms))
     for ii, n_m in enumerate(n_ms):
-        mac[ii], mu_mac[ii], std_mac[ii] = b_significant_1D(
+        p[ii], mac[ii], mu_mac[ii], std_mac[ii] = b_significant_1D(
             magnitudes, mc, delta_m, times, n_m, min_num=min_num,
-            b_method=b_method, **kwargs)
+            method=b_method, **kwargs)
 
     # plot the results
     plt.plot(n_ms, mac, color=color, marker='o', label=label)
-    plt.fill_between(n_ms, mu_mac - 1.96 * std_mac, mu_mac + 1.96 * std_mac,
-                     color=adjust_color_brightness(color, 0.7), alpha=0.2,
+    std_factor = norm.ppf(1 - p_threshold / 2)
+    plt.fill_between(n_ms,
+                     mu_mac - std_factor * std_mac,
+                     mu_mac + std_factor * std_mac,
+                     color=adjust_color_brightness(color, 0.7),
+                     alpha=0.2,
                      linewidth=0)
     plt.plot(n_ms, mu_mac, color=adjust_color_brightness(
         color, 1.3), linestyle='--')
+    if any(p < p_threshold):
+        plt.plot(n_ms[p < p_threshold], mac[p < p_threshold],
+                 'o', color='r')
 
     plt.xlabel('$n_m$')
     plt.ylabel('MAC')