From c476697331f412c08998b10b582ebbfa4b5654fd Mon Sep 17 00:00:00 2001 From: Aron Mirwald Date: Tue, 18 Feb 2025 16:29:21 +0900 Subject: [PATCH] adjust b_significant plotting mostly --- seismostats/analysis/b_significant.py | 66 ++++--- .../analysis/tests/test_b_significant.py | 6 +- seismostats/plots/statistical.py | 166 ++++++++++-------- 3 files changed, 134 insertions(+), 104 deletions(-) diff --git a/seismostats/analysis/b_significant.py b/seismostats/analysis/b_significant.py index 1b98dbb..a8e3bcd 100644 --- a/seismostats/analysis/b_significant.py +++ b/seismostats/analysis/b_significant.py @@ -148,6 +148,7 @@ def values_from_partitioning( list_mc: list[float] | float, delta_m: float, method: AValueEstimator | BValueEstimator = ClassicBValueEstimator, + list_scaling: list | None = None, * args, **kwargs, ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: @@ -164,13 +165,15 @@ def values_from_partitioning( delta_m: Discretization of magnitudes. method: Method to estimate the value. This could be an a-value or b-value estimator. + list_scaling: List of scaling factors for the a-value estimation. Only + used in case the method is an a-value estimator. *args: Additional arguments to the a/b-value estimation method. **kwargs: Additional keyword arguments to the a/b-value estimation method. Returns: - b_values: Series of b-values, each one is estimated from the - magnitudes contained in the corresponding element of + values: Series of b-values (or a-values), each one is estimated + from the magnitudes contained in the corresponding element of ``list_magnitudes``. std_b: Standard deviations corresponding to the b-values. n_ms: Number of events used for the b-value estimates. @@ -208,6 +211,10 @@ def values_from_partitioning( raise IndexError( "Length of list_mc must be the same as list_magnitudes.") list_mc = np.array(list_mc) + if issubclass(method, AValueEstimator): + if list_scaling is None: + list_scaling = np.ones(n_subsets) + list_scaling = np.array(list_scaling) # start estimation estimator = method() @@ -222,11 +229,17 @@ def values_from_partitioning( mags_loop = mags_loop[idx_sorted] times_loop = times_loop[idx_sorted] - estimator.calculate( - mags_loop, mc=list_mc[ii], delta_m=delta_m, *args, **kwargs) if isinstance(estimator, AValueEstimator): + estimator.calculate( + mags_loop, + mc=list_mc[ii], + delta_m=delta_m, + scaling_factor=list_scaling[ii], + *args, **kwargs) values[ii] = estimator.a_value elif isinstance(estimator, BValueEstimator): + estimator.calculate( + mags_loop, mc=list_mc[ii], delta_m=delta_m, *args, **kwargs) values[ii] = estimator.b_value stds[ii] = estimator.std n_ms[ii] = estimator.n @@ -268,17 +281,17 @@ def cut_constant_idx( def b_significant_1D( - mags: np.ndarray, + magnitudes: np.ndarray, mc: float | np.ndarray, delta_m: float, - times: np.ndarray[np.timedelta64], + times: np.ndarray, n_m: int, min_num: int = 10, method: BValueEstimator | AValueEstimator = ClassicBValueEstimator, conservative: bool = True, *args, ** kwargs, -) -> tuple[float, float, float]: +) -> tuple[float, float, float, float]: """ Estimates the significance of variation of b-values (or a-values) along a one-dimensional series of events. @@ -291,11 +304,12 @@ def b_significant_1D( (which can be estimated by len(magnitudes) / n_m) should be used. Args: - mags: Magnitudes of the events. They are assumed to be order + magnitudes: Magnitudes of the events. They are assumed to be order along the dimension of interest (e.g. time or depth) mc: Completeness magnitude. If a single value is provided, it is used for all magnitudes. Otherwise, the individual - completeness of each magnitude can be provided. + completeness of each magnitude can be provided. This will + be used to filter the magnitudes. delta_m: Magnitude bin width. times: Times of the events. n_m: Number of magnitudes in each partition. @@ -325,32 +339,34 @@ def b_significant_1D( autocorrelation for different n_m, use :func:`seismostats.plots.plot_b_significant_1D` """ - # sanity checks - mags = np.array(mags) + # sanity checks and preparation + magnitudes = np.array(magnitudes) times = np.array(times) + if len(magnitudes) != len(times): + raise IndexError("Magnitudes and times must have the same length.") if isinstance(mc, (float, int)): - if min(mags) < mc: - raise ValueError("The completeness magnitude is larger than the " - "smallest magnitude.") - mc = np.ones(len(mags)) * mc + mc = np.ones(len(magnitudes)) * mc else: mc = np.array(mc) - if any(mags < mc): - raise ValueError("There are earthquakes below their respective " - "completeness magnitude.") if n_m < min_num: raise ValueError("n_m cannot be smaller than min_num.") - if len(mags) / n_m < 3: - raise ValueError( - "n_m is too large - less than three subsamples are created.") - elif len(mags) / n_m < 25: + idx = magnitudes >= mc - delta_m / 2 + magnitudes = magnitudes[idx] + times = times[idx] + mc = mc[idx] + + if len(magnitudes) / n_m < 3: + if get_option("warnings") is True: + warnings.warn( + "n_m is too large - less than three subsamples are created." + "return NaNs") + return np.nan, np.nan, np.nan, np.nan + elif len(magnitudes) / n_m < 25: if get_option("warnings") is True: warnings.warn( "The number of subsamples is less than 25. The normality " "assumption of the autocorrelation might not be valid.") - if len(mags) != len(times): - raise IndexError("Magnitudes and times must have the same length.") # Estimate a and b values for n_m realizations. ac_1D = np.zeros(n_m) @@ -360,7 +376,7 @@ def b_significant_1D( for ii in range(n_m): # partition data idx, list_magnitudes = cut_constant_idx( - mags, n_m, offset=ii + magnitudes, n_m, offset=ii ) list_times = np.array_split(times, idx) list_mc = np.array_split(mc, idx) diff --git a/seismostats/analysis/tests/test_b_significant.py b/seismostats/analysis/tests/test_b_significant.py index 7ee7a0b..0e124fe 100644 --- a/seismostats/analysis/tests/test_b_significant.py +++ b/seismostats/analysis/tests/test_b_significant.py @@ -171,7 +171,7 @@ def test_b_significant_1D(): assert_almost_equal(mu_mac, -0.020387359836901122) assert_almost_equal(std_mac, 0.1382577696134609) - with pytest.raises(ValueError): + with pytest.warns(UserWarning): # mags larger than mc present b_significant_1D( mags, @@ -188,7 +188,7 @@ def test_b_significant_1D(): times=times, n_m=10, min_num=11) - with pytest.raises(ValueError): + with pytest.warns(UserWarning): # n_m larger than len(mags)/3 b_significant_1D( mags, @@ -196,7 +196,7 @@ def test_b_significant_1D(): delta_m=1, times=times, n_m=500) - with pytest.raises(IndexError): + with pytest.raises(ValueError): # times and mags have different lengths b_significant_1D( mags, diff --git a/seismostats/plots/statistical.py b/seismostats/plots/statistical.py index eba02c3..70d2fc3 100644 --- a/seismostats/plots/statistical.py +++ b/seismostats/plots/statistical.py @@ -22,21 +22,22 @@ def plot_mc_vs_b( ax: plt.Axes | None = None, color: str = "blue", label: str | None = None, + *args, **kwargs, ) -> plt.Axes: """Plots the estimated b-value in dependence of the completeness magnitude. Args: - magnitudes: magnitudes of the catalog - mcs: completeness magnitudes (list or numpy array) - delta_m: discretization of the magnitudes - method: method used for b-value estimation - confidence_intvl: confidence interval that should be plotted - ax: axis where figure should be plotted - color: color of the data - label: label of the data that will be put in the legend - **kwargs: Additional keyword arguments for the b-value - estimator. + magnitudes: Magnitudes of the catalog. + mcs: Completeness magnitudes. + delta_m: Discretization of the magnitudes. + method: Method used for b-value estimation. + confidence_intvl: Confidence interval that should be plotted. + ax: Axis where figure should be plotted. + color: Color of the data. + label: Label of the data that will be put in the legend. + *args: Additional arguments for the b-value estimator. + **kwargs: Additional keyword arguments for the b-value estimator. Returns: ax that was plotted on @@ -48,7 +49,7 @@ def plot_mc_vs_b( for mc in mcs: estimator.calculate( - magnitudes, mc=mc, delta_m=delta_m, **kwargs) + magnitudes, mc=mc, delta_m=delta_m, *args, **kwargs) b_values.append(estimator.b_value) b_errors.append(estimator.std) @@ -78,9 +79,9 @@ def plot_mc_vs_b( def plot_b_series_constant_nm( - mags: np.ndarray, + magnitudes: np.ndarray, delta_m: float, - mc: np.ndarray, + mc: float | np.ndarray, times: np.ndarray, n_m: int, min_num: float = 2, @@ -98,69 +99,63 @@ def plot_b_series_constant_nm( Plots the b-values estimated from a running window of n_m magnitudes. Args: - mags: magnitudes of the events. If x_variable is None, the magnitudes - are assumed to be sorted in the dimension of interest. - delta_m: magnitude bin width - mc: completeness magnitude. If a single value is provided, it is - used for all magnitudes. Otherwise, the individual completeness of - each magnitude can be provided. - times: times of the events - n_m: number of magnitudes in each partition - min_num: minimum number of events from which a b-value is estimated. - If the number of events is smaller, the b-value is set to np.nan - b_method: method to estimate the b-values - plot_technique: technique where to plot the b-values with respect to - the x-variable. Options are 'left', 'midpoint', 'right'. If set to - 'right' (default), the b-value is plotted at the right edge. For - time series, this is the most common choice, as it avoids optical - illusions of the b-value predicting future seismicity. - x_variable: values of the dimension of interest, along which the - b-values should be plotted. It should be a 1D array with the same - length as the magnitudes, e.g., the time of the events. If None, - the b-values are plotted against the event index. - confidence: confidence interval that should be plotted. Default - is 0.95 (i.e., the 95% confidence interval is plotted) - ax: axis where the plot should be plotted - color: color of the data - label: abel of the data that will be put in the legend - *args: Additional positional arguments for the b-value estimator. + magnitudes: Magnitudes of the events. If x_variable is None, the + magnitudes are assumed to be sorted in the dimension of + interest. + delta_m: Magnitude bin width + mc: Completeness magnitude. If a single value is provided, it is + used for all magnitudes. Otherwise, the individual completeness + of each magnitude can be provided. + times: times of the events + n_m: number of magnitudes in each partition + min_num: Minimum number of events from which a b-value is estimated. + If the number of events is smaller, the b-value is set to np.nan + b_method: Method to estimate the b-values + plot_technique: Technique where to plot the b-values with respect to + the x-variable. Options are 'left', 'midpoint', 'right'. If set + to 'right' (default), the b-value is plotted at the right edge. + For time series, this is the most common choice, as it avoids + optical illusions of the b-value predicting future seismicity. + x_variable: Values of the dimension of interest, along which the + b-values should be plotted. It should be a 1D array with the + same length as the magnitudes, e.g., the time of the events. If + None, the b-values are plotted against the event index. + confidence: Confidence interval that should be plotted. Default + is 0.95 (i.e., the 95% confidence interval is plotted) + ax: axis where the plot should be plotted + color: Color of the data + label: Label of the data that will be put in the legend + *args: Additional positional arguments for the b-value estimator. **kwargs: Additional keyword arguments for the b-value estimator. Returns: - ax that was plotted on + ax: Ax that was plotted on. """ - mags = np.array(mags) + # sanity checks and preparation + magnitudes = np.array(magnitudes) times = np.array(times) - if isinstance(mc, (float, int)): - if min(mags) < mc: - raise ValueError("The completeness magnitude is larger than the " - "smallest magnitude") - mc = np.ones(len(mags)) * mc + mc = np.ones(len(magnitudes)) * mc else: - if any(mags < mc): - raise ValueError("There are earthquakes below their respective " - "completeness magnitude") + mc = np.array(mc) if n_m < min_num: - raise ValueError("n_m cannot be smaller than min_num") - if len(mags) != len(times): - raise IndexError("mags and times must have the same length") - + raise ValueError("n_m cannot be smaller than min_num.") + if len(magnitudes) != len(times): + raise IndexError("Magnitudes and times must have the same length.") if x_variable is None: - x_variable = np.arange(len(mags)) - elif len(x_variable) != len(mags): + x_variable = np.arange(len(magnitudes)) + elif len(x_variable) != len(magnitudes): raise IndexError( - "x_variable must have the same length as magnitudes") + "x_variable must have the same length as magnitudes.") else: + x_variable = np.array(x_variable) idx_sort = np.argsort(x_variable) - mags = mags[idx_sort] + magnitudes = magnitudes[idx_sort] times = times[idx_sort] mc = mc[idx_sort] x_variable = x_variable[idx_sort] - if ax is None: _, ax = plt.subplots() - if plot_technique == 'left': idx_start = 0 elif plot_technique == 'midpoint': @@ -169,25 +164,33 @@ def plot_b_series_constant_nm( idx_start = n_m - 1 else: raise ValueError( - "plot_technique must be 'left', 'midpoint', or 'right'") + "plot_technique must be 'left', 'midpoint', or 'right'.") + + # filter magnitudes + idx = magnitudes >= mc - delta_m / 2 + magnitudes = magnitudes[idx] + times = times[idx] + mc = mc[idx] + x_variable = x_variable[idx] # estimation estimator = b_method() - b_values = np.ones(len(mags)) * np.nan - std_bs = np.ones(len(mags)) * np.nan - for ii in range(len(mags) - n_m + 1): + b_values = np.ones(len(magnitudes)) * np.nan + std_bs = np.ones(len(magnitudes)) * np.nan + for ii in range(len(magnitudes) - n_m + 1): # runnning window of n_m magnitudes - mags_window = mags[ii:ii + n_m] + mags_window = magnitudes[ii:ii + n_m] times_window = times[ii:ii + n_m] + mc_window = mc[ii:ii + n_m] - # sort the magnitudes and times + # sort the magnitudes by time idx = np.argsort(times_window) mags_window = mags_window[idx] - times_window = times_window[idx] + mc_window = mc_window[idx] # estimate the b-value estimator.calculate( - mags_window, mc=mc[ii], delta_m=delta_m, *args, **kwargs) + mags_window, mc=max(mc_window), delta_m=delta_m, *args, **kwargs) if estimator.n < min_num: b_values[idx_start + ii] = np.nan std_bs[idx_start + ii] = np.nan @@ -262,15 +265,16 @@ def plot_b_significant_1D( **kwargs: Additional keyword arguments for the b-value estimator. """ - if n_ms is None: - n_ms = np.arange(20, len(magnitudes) / 25, 5).astype(int) + # sanity checks and preparation + magnitudes = np.array(magnitudes) + times = np.array(times) + if isinstance(mc, (float, int)): mc = np.ones(len(magnitudes)) * mc - elif not isinstance(mc, np.ndarray): - raise ValueError("mc must be a float, or numpy array.") if x_variable is None: x_variable = np.arange(len(magnitudes)) else: + x_variable = np.array(x_variable) if len(x_variable) != len(magnitudes): raise ValueError( "x_variable must have the same length as magnitudes.") @@ -282,28 +286,38 @@ def plot_b_significant_1D( if ax is None: _, ax = plt.subplots() - # filter magnitudes idx = magnitudes >= mc - delta_m / 2 magnitudes = magnitudes[idx] times = times[idx] mc = mc[idx] + if n_ms is None: + n_ms = np.arange(20, len(magnitudes) / 25, 10).astype(int) + # estimate the MAC for each n_m + p = np.zeros(len(n_ms)) mac = np.zeros(len(n_ms)) mu_mac = np.zeros(len(n_ms)) std_mac = np.zeros(len(n_ms)) for ii, n_m in enumerate(n_ms): - mac[ii], mu_mac[ii], std_mac[ii] = b_significant_1D( + p[ii], mac[ii], mu_mac[ii], std_mac[ii] = b_significant_1D( magnitudes, mc, delta_m, times, n_m, min_num=min_num, - b_method=b_method, **kwargs) + method=b_method, **kwargs) # plot the results plt.plot(n_ms, mac, color=color, marker='o', label=label) - plt.fill_between(n_ms, mu_mac - 1.96 * std_mac, mu_mac + 1.96 * std_mac, - color=adjust_color_brightness(color, 0.7), alpha=0.2, + std_factor = norm.ppf(1 - p_threshold / 2) + plt.fill_between(n_ms, + mu_mac - std_factor * std_mac, + mu_mac + std_factor * std_mac, + color=adjust_color_brightness(color, 0.7), + alpha=0.2, linewidth=0) plt.plot(n_ms, mu_mac, color=adjust_color_brightness( color, 1.3), linestyle='--') + if any(p < p_threshold): + plt.plot(n_ms[p < p_threshold], mac[p < p_threshold], + 'o', color='r') plt.xlabel('$n_m$') plt.ylabel('MAC')