From a0814efcb247470cfcf46a0d8c6d8fa2b2739939 Mon Sep 17 00:00:00 2001 From: Tim-Gunnar Hensel Date: Mon, 8 Feb 2021 12:12:18 +0100 Subject: [PATCH] - S3 Interface for MCS (#95) - markdown for documentation (#173) - closes #189 --- NAMESPACE | 7 + NEWS.md | 41 +- R/delay_distributions.R | 822 +----------------- R/mcs_data.R | 8 +- R/mcs_delay.R | 779 +++++++++++++++++ R/mcs_mileage.R | 241 +++++ R/mileage_distribution.R | 413 ++++----- R/utils.R | 20 + _pkgdown.yml | 4 + docs/articles/Life_Data_Analysis_Part_I.html | 6 +- docs/articles/Life_Data_Analysis_Part_II.html | 6 +- .../articles/Life_Data_Analysis_Part_III.html | 4 +- docs/news/index.html | 85 +- docs/pkgdown.yml | 2 +- docs/reference/dist_delay.default.html | 260 ++++++ docs/reference/dist_delay.html | 125 +-- docs/reference/dist_delay_register.html | 10 +- docs/reference/dist_delay_report.html | 10 +- docs/reference/dist_mileage.default.html | 238 +++++ docs/reference/dist_mileage.html | 84 +- docs/reference/estimate_cdf.default.html | 17 +- docs/reference/estimate_cdf.html | 22 +- docs/reference/field_data.html | 235 +++++ docs/reference/index.html | 55 ++ docs/reference/mcs_delay.default.html | 352 ++++++++ docs/reference/mcs_delay.html | 262 ++---- docs/reference/mcs_delay_data.html | 323 +++++++ docs/reference/mcs_delay_register.html | 27 +- docs/reference/mcs_delay_report.html | 31 +- docs/reference/mcs_delays.html | 45 +- docs/reference/mcs_mileage.default.html | 276 ++++++ docs/reference/mcs_mileage.html | 157 +--- docs/reference/mcs_mileage_data.html | 274 ++++++ docs/reference/mixmod_regression.html | 3 +- docs/reference/rank_regression.default.html | 18 +- docs/reference/rank_regression.html | 27 +- docs/reference/reliability_data.html | 9 +- docs/reference/weibulltools-package.html | 2 +- man/dist_delay.Rd | 14 +- man/dist_delay.default.Rd | 10 +- man/dist_delay_register.Rd | 6 +- man/dist_delay_report.Rd | 6 +- man/dist_mileage.Rd | 86 +- man/dist_mileage.default.Rd | 63 ++ man/mcs_delay.Rd | 251 ++---- man/mcs_delay.default.Rd | 170 ++++ man/mcs_delay_data.Rd | 6 +- man/mcs_delay_register.Rd | 27 +- man/mcs_delay_report.Rd | 27 +- man/mcs_delays.Rd | 6 +- man/mcs_mileage.Rd | 153 +--- man/mcs_mileage.default.Rd | 98 +++ man/mcs_mileage_data.Rd | 4 +- man/rank_regression.Rd | 4 +- tests/testthat/_snaps/delay_distributions.md | 2 +- tests/testthat/test-delay_distributions.R | 8 +- tests/testthat/test-mileage_distribution.R | 30 +- 57 files changed, 4285 insertions(+), 1986 deletions(-) create mode 100644 R/mcs_delay.R create mode 100644 R/mcs_mileage.R create mode 100644 docs/reference/dist_delay.default.html create mode 100644 docs/reference/dist_mileage.default.html create mode 100644 docs/reference/field_data.html create mode 100644 docs/reference/mcs_delay.default.html create mode 100644 docs/reference/mcs_delay_data.html create mode 100644 docs/reference/mcs_mileage.default.html create mode 100644 docs/reference/mcs_mileage_data.html create mode 100644 man/dist_mileage.default.Rd create mode 100644 man/mcs_delay.default.Rd create mode 100644 man/mcs_mileage.default.Rd diff --git a/NAMESPACE b/NAMESPACE index 730b0c9..8a52fcc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,8 +9,14 @@ S3method(confint_fisher,wt_ml_estimation) S3method(confint_fisher,wt_model) S3method(dist_delay,default) S3method(dist_delay,wt_mcs_delay_data) +S3method(dist_mileage,default) +S3method(dist_mileage,wt_mcs_mileage_data) S3method(estimate_cdf,default) S3method(estimate_cdf,wt_reliability_data) +S3method(mcs_delay,default) +S3method(mcs_delay,wt_mcs_delay_data) +S3method(mcs_mileage,default) +S3method(mcs_mileage,wt_mcs_mileage_data) S3method(mixmod_em,default) S3method(mixmod_em,wt_reliability_data) S3method(mixmod_regression,default) @@ -50,6 +56,7 @@ S3method(print,wt_delay_estimation_list) S3method(print,wt_em_results) S3method(print,wt_mcs_delay_data) S3method(print,wt_mcs_mileage_data) +S3method(print,wt_mileage_estimation) S3method(print,wt_mixmod_em) S3method(print,wt_mixmod_regression) S3method(print,wt_mixmod_regression_list) diff --git a/NEWS.md b/NEWS.md index 813676c..50793fa 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,28 +1,53 @@ # weibulltools (development version) ## Breaking Changes -* `confint_betabinom()` and `confint_fisher()`: Removed constant features `distribution`, `bounds` and `direction` from the tibble output and added them as attributes instead. + +### Parametric Models * `plot_prob.wt_model()`: Removed dysfunctional argument `distribution`. The distribution is inferred using the model `x`. + +### Confidence Intervals +* `confint_betabinom()` and `confint_fisher()`: Removed constant features `distribution`, `bounds` and `direction` from the tibble output and added them as attributes instead. + +### Monte Carlo Simulation * `mcs_mileage()`: Changed name of output column `mileage` to `x` (in accordance with `reliability_data()`). * `mcs_delay()`: Changed name of output column `time` to `x` (in accordance with `reliability_data()`). +* `dist_mileage.default()` (former `dist_mileage()`): Renamed argument `mileage` with `x`. +* `mcs_mileage.default()` (former `mcs_mileage()`): Renamed argument `mileage` with `x`. ## New Features + +### Confidence Intervals +* `confint_betabinom()`: Methods `"kaplan"` and `"nelson"` of `estimate_cdf()` can be used for beta-binomial confidence bounds. + +### Monte Carlo Simulation +* Added `mcs_mileage_data()`: Create consistent MCS data for `mcs_mileage()`. +* Added `mcs_delay_data()`: Create consistent MCS data for `mcs_delay()`. +* `dist_mileage()` is now an S3 generic. `dist_mileage()` becomes `dist_mileage.default()`. Added `dist_mileage.wt_mcs_mileage_data()`. * `dist_delay()` is now an S3 generic. `dist_delay()` becomes `dist_delay.default()`. Added `dist_delay.wt_mcs_delay_data()`. -* Added `print.wt_delay_estimation()` for one delay and and `print.wt_delay_estimation_list()` for multiple delays. * `dist_delay()` now supports the estimation of multiple delay distributions at once. +* `mcs_mileage()` is now an S3 generic. `mcs_mileage()` becomes `mcs_mileage.default()`. Added `mcs_mileage.wt_mcs_mileage_data()`. +* `mcs_delay()` is now an S3 generic. `mcs_delay()` becomes `mcs_delay.default()`. Added `mcs_delay.wt_mcs_delay_data()`. * Added `print.wt_mcs_delay_data()` and `print.wt_mcs_mileage_data()`. -* Added `mcs_delay_data()`: Create consistent MCS data for `mcs_delay()`. -* Added `mcs_mileage_data()`: Create consistent MCS data for `mcs_mileage()`. -* `confint_betabinom()`: Methods `"kaplan"` and `"nelson"` of `estimate_cdf()` can be used for beta-binomial confidence bounds. +* Added `print.wt_mileage_estimation()`. +* Added `print.wt_delay_estimation()` for one delay and `print.wt_delay_estimation_list()` for multiple delays. ## Minor Improvements and bug fixes -* The object returned by `dist_delay()` now has class `wt_delay_estimation` or `wt_delay_estimation_list`. -* Fixed bug in `plot_conf()`: Wrong confidence bounds were displayed for `direction = "x"` (#181). -* Fixed bug in `plot_conf()`: `plot_method = "ggplot2"` and exactly one method in `estimate_cdf()` resulted in an error (#182). + +### Reliability Data * Fixed bug in `reliability_data()`: Using `!!` syntax with arguments `x` and `status` resulted in an error. * `estimate_cdf()` preserves additional columns, that were returned from `reliability_data(..., .keep_all = TRUE)`. * Improved `print.wt_reliability_data()`. +### Confidence Intervals +* Fixed bug in `plot_conf()`: Wrong confidence bounds were displayed for `direction = "x"` (#181). +* Fixed bug in `plot_conf()`: `plot_method = "ggplot2"` and exactly one method in `estimate_cdf()` resulted in an error (#182). + +### Monte Carlo Simulation +* The object returned by `mcs_mileage()` now has class `wt_mcs_mileage`. +* The object returned by `mcs_delay()` now has class `wt_mcs_delay`. +* The object returned by `dist_mileage()` now has class `wt_mileage_estimation`. +* The object returned by `dist_delay()` now has class `wt_delay_estimation` or `wt_delay_estimation_list`. + ## Documentation improvements * `plot_prob()`: Better work out the distinction between `plot_prob.wt_cdf_estimation()` and `plot_prob.wt_model()`. The former is applied to a CDF estimation whereas the latter is applied to a mixture model. diff --git a/R/delay_distributions.R b/R/delay_distributions.R index 4554b1c..b66f11a 100644 --- a/R/delay_distributions.R +++ b/R/delay_distributions.R @@ -1,8 +1,6 @@ #' Parameter Estimation of a Delay Distribution #' #' @description -#' `r lifecycle::badge("experimental")` -#' #' This function models a delay (in days) random variable (e.g. in logistic, #' registration, report) using a supposed continuous distribution. First, the #' row-wise differences in days of the related date columns are calculated and then @@ -15,10 +13,10 @@ #' Time differences less than or equal to zero are not considered as well. #' #' @param x A `tibble` of class `wt_mcs_delay_data` returned by [mcs_delay_data]. -#' @param distribution Supposed distribution of the random variable. +#' @param distribution Supposed distribution of the respective delay. #' @template dots #' -#' @return A list of class `wt_delay_estimation` which contains: +#' @return A list with class `wt_delay_estimation` which contains: #' #' * `coefficients` : A named vector of estimated parameter(s). #' * `delay` : A numeric vector of element-wise computed differences in days. @@ -87,7 +85,7 @@ #' @md #' #' @export -dist_delay <- function(x, distribution, ...) { +dist_delay <- function(...) { UseMethod("dist_delay") } @@ -96,12 +94,15 @@ dist_delay <- function(x, distribution, ...) { #' @rdname dist_delay #' #' @export -dist_delay.wt_mcs_delay_data <- function(x, - distribution = c("lognormal", "exponential"), - ... - +dist_delay.wt_mcs_delay_data <- function( + ..., + x, + distribution = c("lognormal", "exponential") ) { + # Check that '...' argument is not used: + check_dots(...) + # Extract 'mcs_start_dates' and 'mcs_end_dates' columns as list: date_1_names <- attr(x, "mcs_start_dates") date_2_names <- attr(x, "mcs_end_dates") @@ -147,7 +148,7 @@ dist_delay.wt_mcs_delay_data <- function(x, #' the later date(s) of the particular delay(s). Use `NA` for missing elements. #' @inheritParams dist_delay #' -#' @return A list of class `wt_delay_estimation` which contains: +#' @return A list with class `wt_delay_estimation` which contains: #' #' * `coefficients` : A named vector of estimated parameter(s). #' * `delay` : A numeric vector of element-wise computed differences in days. @@ -191,12 +192,15 @@ dist_delay.wt_mcs_delay_data <- function(x, #' @md #' #' @export -dist_delay.default <- function(date_1, +dist_delay.default <- function(..., + date_1, date_2, - distribution = c("lognormal", "exponential"), - ... + distribution = c("lognormal", "exponential") ) { + # Check that '...' argument is not used: + check_dots(...) + # Convert date_1 and date_2 to lists if they are vectors: if (!is.list(date_1)) date_1 <- list(date_1) if (!is.list(date_2)) date_2 <- list(date_2) @@ -252,6 +256,7 @@ dist_delay.default <- function(date_1, +# Helper function that performs the estimation of a parametric delay distribution: dist_delay_ <- function(date_1, date_2, distribution = c("lognormal", "exponential") @@ -269,17 +274,27 @@ dist_delay_ <- function(date_1, # Checks: ## all NA: if (all(is.na(t_delay))) { - stop("All differences are NA. No parameters can be estimated!") + stop( + "All date differences are 'NA'. No parameters can be estimated!", + call. = FALSE + ) } ## any or all delays are smaller or equal to zero: if (any(t_delay <= 0, na.rm = TRUE)) { if (all(t_delay <= 0, na.rm = TRUE)) { ### all: - stop("All differences are smaller or equal to 0. No parameters can be estimated!") + stop( + "All date differences are smaller or equal to 0. ", + "No parameters can be estimated!", + call. = FALSE + ) } else { ### any: - warning("At least one of the time differences is smaller or equal to 0 and is", - " ignored for the estimation step!") + warning( + "At least one of the date differences is smaller or equal to 0 and is ", + "ignored for the estimation step!", + call. = FALSE + ) t_delay <- t_delay[t_delay > 0] } } @@ -346,353 +361,13 @@ print.wt_delay_estimation_list <- function(x, -#' Adjustment of Operating Times by Delays using a Monte Carlo Approach -#' -#' @description -#' In general, the amount of available information about units in the field is very -#' different. During the warranty period, there are only a few cases with complete -#' data (mainly \emph{failed units}) but lots of cases with incomplete data (usually -#' \emph{censored units}). As a result, the operating time of units with incomplete -#' information is often inaccurate and must be adjusted by delays. -#' -#' This function reduces the operating times of incomplete observations by simulated -#' delays (in days). A unit is considered as incomplete if the later of the two -#' dates is unknown, i.e. \code{date_2 = NA}. See 'Details' for some practical examples. -#' -#' Random delay numbers are drawn from the distribution determined by complete cases -#' (described in 'Details' of \code{\link{dist_delay}}). -#' -#' @details -#' In field data analysis time-dependent characteristics (e.g. \emph{time in service}) -#' are often imprecisely recorded. These inaccuracies are caused by unconsidered delays. -#' -#' For a better understanding of the MCS application in the context of field data, -#' two cases are described below. -#' \itemize{ -#' \item \strong{Delay in registration}: It is common that a supplier, which provides -#' parts to the manufacturing industry does not know when the unit, in which -#' its parts are installed, were put in service (due to unknown \code{date_2}, -#' i.e. registration or sales date). Without taking the described delay into -#' account, the time in service of the failed units would be the difference -#' between the repair date and \code{date_1} (i.e. the production date) and for -#' intact units the difference between the present date and \code{date_1}. But -#' the real operating times are (much) shorter, since the stress on the -#' components have not started until the whole systems were put in service. -#' Hence, units with incomplete data (missing \code{date_2}) must be reduced by -#' the delays. -#' \item \strong{Delay in report}: Authorized repairers often do not immediately -#' notify the manufacturer or OEM of repairs that were made during the warranty -#' period, but instead pass the information about these repairs in collected -#' forms e.g. weekly, monthly or quarterly. The resulting time difference between -#' the reporting (\code{date_2}) of the repair in the guarantee database and the -#' actual repair date (\code{date_1}), which is often assumed to be the failure -#' date, is called the reporting delay. For a given date where the analysis -#' is made there could be units which had a failure but are not registered -#' and therefore treated as censored units. In order to take this case into -#' account and according to the principle of equal opportunities, the lifetime -#' of units with no report date (\code{date_2 = NA}) is reduced by simulated -#' reporting delays. -#' } -#' -#' @param date_1 A vector of class \code{character} or \code{Date}, in the -#' format "yyyy-mm-dd", indicating the earlier of the two dates. Use \code{NA} -#' for missing elements. -#' -#' If more than one delay should be considered it must be a list where the first -#' element contains the earlier dates of the first delay and the second element -#' contains the earlier dates of the second delay, and so forth.(See 'Examples'). -#' @param date_2 A vector of class \code{character} or \code{Date}, in the -#' format "yyyy-mm-dd", indicating the later of the two dates. Use \code{NA} -#' for missing elements. -#' -#' If more than one delay should be considered it must be a list where the first -#' element contains the later dates of the first delay and the second element -#' contains the later dates of the second delay, and so forth. (See 'Examples'). -#' @param time A numeric vector of operating times. Use \code{NA} for missing elements. -#' @param status Optional argument. If used it has to be a vector of binary -#' data (0 or 1) indicating whether unit \emph{i} is a right censored observation -#' (= 0) or a failure (= 1). The effect of \code{status} on the return is described -#' in 'Value'. -#' @param id A vector for the identification of every unit. -#' @param distribution Supposed distribution of the delay random variable. If more -#' than one delay is to be considered and different distributions are assumed -#' for each delay, the argument \code{distribution} must have the same length -#' as list \code{date_1} (and \code{date_2}). For example, in the case of -#' two delays with different distributions, one has to specify the argument as -#' \code{distribution = c("lognormal", "exponential")}. Then the lognormal -#' distribution is applied to the first delay and the exponential distribution -#' to the second (See 'Examples'). -#' -#' @return A list containing the following elements: -#' \itemize{ -#' \item \code{data} : A tibble with classes \code{wt_mcs_data} and -#' \code{wt_reliability_data} if \code{status} is provided. Since the -#' class \code{wt_reliability_data} enables the direct usage of -#' \code{data} inside -#' \code{\link[=estimate_cdf]{estimate_cdf.wt_reliability_data}}, the -#' required lifetime characteristic is automatically set to the operating -#' time \code{time}. -#' -#' If \code{status = NULL} class is \code{wt_mcs_data}, which is not -#' supported by \code{estimate_cdf} due to missing \code{status}. -#' -#' The tibble contains the following columns: -#' \itemize{ -#' \item \code{date_1} : Earlier dates. If argument \code{date_1} is a list -#' of length \emph{i, i > 1} (described in \strong{Arguments}) multiple -#' columns with names \code{date_1.1}, \code{date_1.2}, ..., \code{date_1.i} -#' and the corresponding values of the earlier dates are used. -#' \item \code{date_2} : Later dates. In the case of a list with length greater -#' than 1, the routine described above is used. -#' \item \code{x} : Adjusted operating times for incomplete observations -#' and input operating times for the complete observations. -#' \item \code{status} (\strong{optional}) : -#' \itemize{ -#' \item If argument \code{status = NULL} column \code{status} does -#' not exist. -#' \item If argument \code{status} is provided the column contains -#' the entered binary data (0 or 1). -#' } -#' \item \code{id} : Identification of every unit. -#' } -#' \item \code{sim_data} : A tibble with column \code{sim_delay} that holds the -#' simulated delay-specific numbers for incomplete cases and \code{0} for -#' complete cases. If more than one delay was considered multiple columns -#' \code{sim_delay.1}, \code{sim_delay.2}, ..., \code{sim_delay.i} with -#' corresponding delay-specific random numbers are presented. -#' \item \code{model_estimation} : A list containing a named list -#' (\code{"delay_distribution"}) with output of \code{\link{dist_delay}}. For -#' multiple delays the list contains as many lists as there are delays, i.e. -#' (\code{"delay_distribution.1"}, \code{"delay_distribution.2"}, ..., -#' \code{"delay_distribution.i"}). -#' } -#' -#' @references Verband der Automobilindustrie e.V. (VDA); Qualitätsmanagement in -#' der Automobilindustrie. Zuverlässigkeitssicherung bei Automobilherstellern -#' und Lieferanten. Zuverlässigkeits-Methoden und -Hilfsmittel.; 4th Edition, 2016, -#' -#' -#' @seealso \code{\link{estimate_cdf}} -#' -#' @examples -#' # Data for examples: -#' date_of_production <- c("2014-07-28", "2014-02-17", "2014-07-14", -#' "2014-06-26", "2014-03-10", "2014-05-14", -#' "2014-05-06", "2014-03-07", "2014-03-09", -#' "2014-04-13", "2014-05-20", "2014-07-07", -#' "2014-01-27", "2014-01-30", "2014-03-17", -#' "2014-02-09", "2014-04-14", "2014-04-20", -#' "2014-03-13", "2014-02-23", "2014-04-03", -#' "2014-01-08", "2014-01-08") -#' -#' date_of_registration <- c(NA, "2014-03-29", "2014-12-06", "2014-09-09", -#' NA, NA, "2014-06-16", NA, "2014-05-23", -#' "2014-05-09", "2014-05-31", NA, "2014-04-13", -#' NA, NA, "2014-03-12", NA, "2014-06-02", -#' NA, "2014-03-21", "2014-06-19", NA, NA) -#' -#' date_of_repair <- c(NA, "2014-09-15", "2015-07-04", "2015-04-10", NA, -#' NA, "2015-04-24", NA, "2015-04-25", "2015-04-24", -#' "2015-06-12", NA, "2015-05-04", NA, NA, -#' "2015-05-22", NA, "2015-09-17", NA, "2015-08-15", -#' "2015-11-26", NA, NA) -#' -#' date_of_report <- c(NA, "2014-10-09", "2015-08-28", "2015-04-15", NA, -#' NA, "2015-05-16", NA, "2015-05-28", "2015-05-15", -#' "2015-07-11", NA, "2015-08-14", NA, NA, -#' "2015-06-05", NA, "2015-10-17", NA, "2015-08-21", -#' "2015-12-02", NA, NA) -#' -#' time_in_service <- rep(1000, length(date_of_production)) -#' status <- c(0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0) -#' -#' # Example 1 - MCS for delay in registration: -#' mcs_regist <- mcs_delay( -#' date_1 = date_of_production, -#' date_2 = date_of_registration, -#' time = time_in_service, -#' status = status, -#' distribution = "lognormal" -#' ) -#' -#' # Example 2 - MCS for delay in report: -#' mcs_report <- mcs_delay( -#' date_1 = date_of_repair, -#' date_2 = date_of_report, -#' time = time_in_service, -#' status = status, -#' distribution = "exponential" -#' ) -#' -#' # Example 3 - Reproducibility of random numbers: -#' set.seed(1234) -#' mcs_report_reproduce <- mcs_delay( -#' date_1 = date_of_repair, -#' date_2 = date_of_report, -#' time = time_in_service, -#' status = status, -#' distribution = "exponential" -#' ) -#' -#' # Example 4 - MCS for delays in registration and report with same distribution: -#' mcs_delays <- mcs_delay( -#' date_1 = list(date_of_production, date_of_repair), -#' date_2 = list(date_of_registration, date_of_report), -#' time = time_in_service, -#' status = status, -#' distribution = "lognormal" -#' ) -#' -#' # Example 5 - MCS for delays in registration and report with different distributions: -#' ## Assuming lognormal registration and exponential reporting delays. -#' mcs_delays_2 <- mcs_delay( -#' date_1 = list(date_of_production, date_of_repair), -#' date_2 = list(date_of_registration, date_of_report), -#' time = time_in_service, -#' status = status, -#' distribution = c("lognormal", "exponential") -#' ) -#' -#' @export -mcs_delay <- function(date_1, - date_2, - time, - status = NULL, - id = paste0("ID", seq_len(length(time))), - distribution = c("lognormal", "exponential") -) { - - # Checks: - ## Check for (multiple) distributions: - if (missing(distribution)) { - distribution <- "lognormal" - } - distribution <- match.arg(distribution, several.ok = TRUE) - - ## Convert date_1 and date_2 to lists if they are vectors: - if (!is.list(date_1)) date_1 <- list(date_1) - if (!is.list(date_2)) date_2 <- list(date_2) - - ## Check for different length in date_1 and date_2: - purrr::walk2(date_1, date_2, function(e1, e2) { - if (length(e1) != length(e2)) { - stop("Elements of 'date_1' and 'date_2' differ in length!") - } - }) - - # Step 1: Parameter estimation using complete cases: - par_list <- purrr::pmap( - list( - date_1, - date_2, - distribution - ), - dist_delay_ - ) - - # Step 2: Simulation of random numbers: - sim_list <- purrr::map2( - date_2, - par_list, - mcs_helper - ) - - ## Adjustment of operating times: - time <- time - purrr::reduce(sim_list, `+`) - - # Prepare data_list which has to be converted to a tibble: - if (purrr::is_null(status)) { - data_list <- c(date_1, date_2, list(time, id)) - } else { - # check for status: - if (!is_status(status)) { - stop("'status' must be numeric with elements 0 or 1!") - } - data_list <- c(date_1, date_2, list(time, status, id)) - } - - # Defining and setting names for output elements: - ## lengths of lists sim_list, par_list, date_1, date_2 remains the same: - if (length(sim_list) > 1) { - sim_list_names <- paste0("sim_delay.", seq_along(sim_list)) - par_list_names <- paste0("delay_distribution.", seq_along(sim_list)) - data_list_names <- c( - paste0("date_1.", seq_along(date_1)), - paste0("date_2.", seq_along(date_2)) - ) - } else { - sim_list_names <- "sim_delay" - par_list_names <- "delay_distribution" - data_list_names <- c("date_1", "date_2") - } - - names(sim_list) <- sim_list_names - names(par_list) <- par_list_names - - if (purrr::is_null(status)) { - names(data_list) <- c(data_list_names, "x", "id") - class_assign <- "wt_mcs_data" - } else { - names(data_list) <- c(data_list_names, "x", "status", "id") - class_assign <- c("wt_mcs_data", "wt_reliability_data") - } - - # Defining data_tbl with class "wt_mcs_data" and/or "wt_reliability_data": - data_tbl <- tibble::as_tibble(data_list) - class(data_tbl) <- c(class_assign, class(data_tbl)) - - if (!purrr::is_null(status)) { - attr(data_tbl, "characteristic") <- "time" - } - - mcs_output <- list( - data = data_tbl, - sim_data = tibble::as_tibble(sim_list), - model_estimation = par_list - ) - - return(mcs_output) -} - - - -# helper function to generate MCS random numbers: -mcs_helper <- function(x, par_list) { - - # adjustment can only be done for units that have a x entry of NA! Otherwise - # data would be complete and no simulation is needed. - replacable <- is.na(x) - - # generate random numbers: - if (par_list$distribution == "lognormal") { - x_sim <- stats::rlnorm( - length(x), - par_list$coefficients[1], - par_list$coefficients[2] - ) - } - - if (par_list$distribution == "exponential") { - x_sim <- stats::rexp( - length(x), - 1 / par_list$coefficients[1] - ) - } - - x_sim[!replacable] <- 0 - - return(x_sim) -} - - - #' Parameter Estimation of the Delay in Registration Distribution #' #' @description #' `r lifecycle::badge("soft-deprecated")` #' -#' \code{dist_delay_register()} is no longer under active development, switching -#' to \code{\link{dist_delay}} is recommended. +#' `dist_delay_register()` is no longer under active development, switching to +#' [dist_delay] is recommended. #' #' @details #' This function introduces a delay random variable by calculating the time @@ -700,14 +375,14 @@ mcs_helper <- function(x, par_list) { #' and afterwards estimates the parameter(s) of a supposed distribution, #' using maximum likelihood. #' -#' @param date_prod A vector of class \code{character} or \code{Date}, in the +#' @param date_prod A vector of class `character` or `Date`, in the #' format "yyyy-mm-dd", indicating the date of production of a unit. -#' Use \code{NA} for missing elements. -#' @param date_register A vector of class \code{character} or \code{Date}, in +#' Use `NA` for missing elements. +#' @param date_register A vector of class `character` or `Date`, in #' the format "yyyy-mm-dd", indicating the date of registration of a unit. -#' Use \code{NA} for missing elements. +#' Use `NA` for missing elements. #' @param distribution Supposed distribution of the random variable. Only -#' \code{"lognormal"} is implemented. +#' `"lognormal"`is implemented. #' #' @return A named vector of estimated parameters for the specified #' distribution. @@ -782,142 +457,13 @@ dist_delay_register <- function(date_prod, -#' Adjustment of Operating Times by Delays in Registration using a Monte Carlo -#' Approach -#' -#' @description -#' `r lifecycle::badge("soft-deprecated")` -#' -#' \code{mcs_delay_register()} is no longer under active development, switching -#' to \code{\link{mcs_delay}} is recommended. -#' -#' @details -#' In general the amount of information about units in the field, that have not -#' failed yet, are rare. For example it is common that a supplier, who provides -#' parts to the automotive industry does not know when a vehicle was put in -#' service and therefore does not know the exact operating time of the supplied -#' parts. This function uses a Monte Carlo approach for simulating the operating -#' times of (multiple) right censored observations, taking account of registering -#' delays. The simulation is based on the distribution of operating times that were -#' calculated from complete data (see \code{\link{dist_delay_register}}). -#' -#' @inheritParams dist_delay_register -#' @param time A numeric vector of operating times. -#' @param status A vector of binary data (0 or 1) indicating whether unit \emph{i} -#' is a right censored observation (= 0) or a failure (= 1). -#' @param distribution Supposed distribution of the random variable. Only -#' \code{"lognormal"} is implemented. -#' @param details A logical. If \code{FALSE} the output consists of a vector with -#' corrected operating times for the censored units and the input operating -#' times for the failed units. If \code{TRUE} the output consists of a detailed -#' list, i.e the same vector as described before, simulated random numbers and -#' estimated distribution parameters. -#' -#' @return A numeric vector of corrected operating times for the censored units -#' and the input operating times for the failed units if -#' \code{details = FALSE}. If \code{details = TRUE} the output is a list which -#' consists of the following elements: -#' \itemize{ -#' \item \code{time} : Numeric vector of corrected operating times for the -#' censored observations and input operating times for failed units. -#' \item \code{x_sim} : Simulated random numbers of specified distribution with -#' estimated parameters. The length of \code{x_sim} is equal to the number of -#' censored observations. -#' \item \code{coefficients} : Estimated coefficients of supposed -#' distribution. -#' } -#' -#' @examples -#' date_of_production <- c("2014-07-28", "2014-02-17", "2014-07-14", -#' "2014-06-26", "2014-03-10", "2014-05-14", -#' "2014-05-06", "2014-03-07", "2014-03-09", -#' "2014-04-13", "2014-05-20", "2014-07-07", -#' "2014-01-27", "2014-01-30", "2014-03-17", -#' "2014-02-09", "2014-04-14", "2014-04-20", -#' "2014-03-13", "2014-02-23", "2014-04-03", -#' "2014-01-08", "2014-01-08") -#' date_of_registration <- c(NA, "2014-03-29", "2014-12-06", "2014-09-09", -#' NA, NA, "2014-06-16", NA, "2014-05-23", -#' "2014-05-09", "2014-05-31", NA, "2014-04-13", -#' NA, NA, "2014-03-12", NA, "2014-06-02", -#' NA, "2014-03-21", "2014-06-19", NA, NA) -#' -#' op_time <- rep(1000, length(date_of_production)) -#' status <- c(0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0) -#' -#' # Example 1 - Simplified vector output: -#' x_corrected <- mcs_delay_register( -#' date_prod = date_of_production, -#' date_register = date_of_registration, -#' time = op_time, -#' status = status, -#' distribution = "lognormal", -#' details = FALSE -#' ) -#' -#' # Example 2 - Detailed list output: -#' list_detail <- mcs_delay_register( -#' date_prod = date_of_production, -#' date_register = date_of_registration, -#' time = op_time, -#' status = status, -#' distribution = "lognormal", -#' details = TRUE -#' ) -#' -#' @md -#' -#' @export -mcs_delay_register <- function(date_prod, - date_register, - time, - status, - distribution = "lognormal", - details = FALSE -) { - deprecate_soft("2.0.0", "mcs_delay_register()", "mcs_delay()") - - # Number of Monte Carlo simulated random numbers, i.e. number of censored data. - n_rand <- sum(is.na(date_register)) - if (any(!stats::complete.cases(date_prod) | !stats::complete.cases(date_register))) { - prod_date <- date_prod[(stats::complete.cases(date_prod) & - stats::complete.cases(date_register))] - register_date <- date_register[(stats::complete.cases(date_prod) & - stats::complete.cases(date_register))] - } else { - prod_date <- date_prod - register_date <- date_register - } - - if (distribution == "lognormal") { - params <- dist_delay_register(date_prod = prod_date, - date_register = register_date, - distribution = "lognormal") - - x_sim <- stats::rlnorm(n = n_rand, meanlog = params[[1]], sdlog = params[[2]]) - } else { - stop("No valid distribution!") - } - - time[is.na(date_register)] <- time[is.na(date_register)] - x_sim - - if (details == FALSE) { - output <- time - } else { - output <- list(time = time, x_sim = x_sim, coefficients = params) - } - return(output) -} - - - #' Parameter Estimation of the Delay in Report Distribution #' #' @description #' `r lifecycle::badge("soft-deprecated")` #' -#' \code{dist_delay_report()} is no longer under active development, switching -#' to \code{\link{dist_delay}} is recommended. +#' `dist_delay_report()`is no longer under active development, switching to +#' [dist_delay] is recommended. #' #' @details #' This function introduces a delay random variable by calculating the time @@ -927,12 +473,12 @@ mcs_delay_register <- function(date_prod, #' #' @inheritParams dist_delay_register #' -#' @param date_repair a vector of class \code{character} or \code{Date}, in the +#' @param date_repair a vector of class `character` or `Date`, in the #' format "yyyy-mm-dd", indicating the date of repair of a failed unit. -#' Use \code{NA} for missing elements. -#' @param date_report a vector of class \code{character} or \code{Date}, in the +#' Use `NA` for missing elements. +#' @param date_report a vector of class `character` or `Date`, in the #' format "yyyy-mm-dd", indicating the date of report of a failed unit. -#' Use \code{NA} for missing elements. +#' Use `NA` for missing elements. #' #' @return A named vector of estimated parameters for the specified #' distribution. @@ -1002,281 +548,3 @@ dist_delay_report <- function(date_repair, return(estimates) } - - - -#' Adjustment of Operating Times by Delays in Report using a Monte Carlo Approach -#' -#' @description -#' `r lifecycle::badge("soft-deprecated")` -#' -#' \code{mcs_delay_report()} is no longer under active development, switching -#' to \code{\link{mcs_delay}} is recommended. -#' -#' @details -#' The delay in report describes the time between the occurrence of a damage and -#' the registration in the warranty database. For a given date where the analysis -#' is made there could be units which had a failure but are not registered in the -#' database and therefore treated as censored units. To overcome this problem -#' this function uses a Monte Carlo approach for simulating the operating -#' times of (multiple) right censored observations, taking account of reporting -#' delays. The simulation is based on the distribution of operating times that were -#' calculated from complete data, i.e. failed items (see \code{\link{dist_delay_report}}). -#' -#' @inheritParams dist_delay_report -#' @inheritParams mcs_delay_register -#' -#' @return A numeric vector of corrected operating times for the censored units -#' and the input operating times for the failed units if -#' \code{details = FALSE}. If \code{details = TRUE} the output is a list which -#' consists of the following elements: -#' \itemize{ -#' \item \code{time} : Numeric vector of corrected operating times for the -#' censored observations and input operating times for failed units. -#' \item \code{x_sim} : Simulated random numbers of specified distribution with -#' estimated parameters. The length of \code{x_sim} is equal to the number of -#' censored observations. -#' \item \code{coefficients} : Estimated coefficients of supposed -#' distribution. -#' } -#' -#' @examples -#' date_of_repair <- c(NA, "2014-09-15", "2015-07-04", "2015-04-10", NA, -#' NA, "2015-04-24", NA, "2015-04-25", "2015-04-24", -#' "2015-06-12", NA, "2015-05-04", NA, NA, -#' "2015-05-22", NA, "2015-09-17", NA, "2015-08-15", -#' "2015-11-26", NA, NA) -#' -#' date_of_report <- c(NA, "2014-10-09", "2015-08-28", "2015-04-15", NA, -#' NA, "2015-05-16", NA, "2015-05-28", "2015-05-15", -#' "2015-07-11", NA, "2015-08-14", NA, NA, -#' "2015-06-05", NA, "2015-10-17", NA, "2015-08-21", -#' "2015-12-02", NA, NA) -#' -#' op_time <- rep(1000, length(date_of_repair)) -#' status <- c(0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0) -#' -#' # Example 1 - Simplified vector output: -#' x_corrected <- mcs_delay_report( -#' date_repair = date_of_repair, -#' date_report = date_of_report, -#' time = op_time, -#' status = status, -#' distribution = "lognormal", -#' details = FALSE -#' ) -#' -#' # Example 2 - Detailed list output: -#' list_detail <- mcs_delay_report( -#' date_repair = date_of_repair, -#' date_report = date_of_report, -#' time = op_time, -#' status = status, -#' distribution = "lognormal", -#' details = TRUE -#' ) -#' -#' @md -#' -#' @export -mcs_delay_report <- function(date_repair, - date_report, - time, - status, - distribution = "lognormal", - details = FALSE -) { - deprecate_soft("2.0.0", "mcs_delay_report()", "mcs_delay()") - - # Number of Monte Carlo simulated random numbers, i.e. number of censored data. - n_rand <- sum(status == 0) - - if (any(!stats::complete.cases(date_repair) | !stats::complete.cases(date_report))) { - repair_date <- date_repair[(stats::complete.cases(date_repair) & - stats::complete.cases(date_report))] - report_date <- date_report[(stats::complete.cases(date_repair) & - stats::complete.cases(date_report))] - } else { - repair_date <- date_repair - report_date <- date_report - } - - if (distribution == "lognormal") { - params <- dist_delay_report(date_repair = repair_date, - date_report = report_date, - distribution = "lognormal") - - x_sim <- stats::rlnorm(n = n_rand, meanlog = params[[1]], sdlog = params[[2]]) - } else { - stop("No valid distribution!") - } - - time[status == 0] <- time[status == 0] - x_sim - - if (details == FALSE) { - output <- time - } else { - output <- list(time = time, x_sim = x_sim, coefficients = params) - } - return(output) -} - - - -#' Adjustment of Operating Times by Delays using a Monte Carlo Approach -#' -#' @description -#' `r lifecycle::badge("soft-deprecated")` -#' -#' `mcs_delays()` is no longer under active development, switching to [mcs_delay] -#' is recommended. -#' -#' @details -#' This function is a wrapper that combines both, [mcs_delay_register] and -#' [mcs_delay_report] functions for the adjustment of operating times of censored units. -#' -#' @inheritParams mcs_delay_register -#' @inheritParams dist_delay_report -#' -#' @return A numerical vector of corrected operating times for the censored units -#' and the input operating times for the failed units if -#' `details = FALSE`. If `details = TRUE` the output is a list which -#' consists of the following elements: -#' -#' * `time` : A numeric vector of corrected operating times for the censored -#' observations and input operating times for failed units. -#' * `x_sim_regist` : Simulated random numbers of specified distribution with -#' estimated parameters for delay in registration. The length of `x_sim_regist` -#' is equal to the number of censored observations. -#' * `x_sim_report` : Simulated random numbers of specified distribution with -#' estimated parameters for delay in report. The length of `x_sim_report` is -#' equal to the number of censored observations. -#' * `coefficients_regist` : Estimated coefficients of supposed distribution for -#' delay in registration. -#' * `coefficients_report` : Estimated coefficients of supposed distribution for -#' delay in report -#' -#' @examples -#' date_of_production <- c("2014-07-28", "2014-02-17", "2014-07-14", -#' "2014-06-26", "2014-03-10", "2014-05-14", -#' "2014-05-06", "2014-03-07", "2014-03-09", -#' "2014-04-13", "2014-05-20", "2014-07-07", -#' "2014-01-27", "2014-01-30", "2014-03-17", -#' "2014-02-09", "2014-04-14", "2014-04-20", -#' "2014-03-13", "2014-02-23", "2014-04-03", -#' "2014-01-08", "2014-01-08") -#' date_of_registration <- c("2014-08-17", "2014-03-29", "2014-12-06", -#' "2014-09-09", "2014-05-14", "2014-07-01", -#' "2014-06-16", "2014-04-03", "2014-05-23", -#' "2014-05-09", "2014-05-31", "2014-08-12", -#' "2014-04-13", "2014-02-15", "2014-07-07", -#' "2014-03-12", "2014-05-27", "2014-06-02", -#' "2014-05-20", "2014-03-21", "2014-06-19", -#' "2014-02-12", "2014-03-27") -#' date_of_repair <- c(NA, "2014-09-15", "2015-07-04", "2015-04-10", NA, -#' NA, "2015-04-24", NA, "2015-04-25", "2015-04-24", -#' "2015-06-12", NA, "2015-05-04", NA, NA, -#' "2015-05-22", NA, "2015-09-17", NA, "2015-08-15", -#' "2015-11-26", NA, NA) -#' -#' date_of_report <- c(NA, "2014-10-09", "2015-08-28", "2015-04-15", NA, -#' NA, "2015-05-16", NA, "2015-05-28", "2015-05-15", -#' "2015-07-11", NA, "2015-08-14", NA, NA, -#' "2015-06-05", NA, "2015-10-17", NA, "2015-08-21", -#' "2015-12-02", NA, NA) -#' -#' op_time <- rep(1000, length(date_of_repair)) -#' status <- c(0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0) -#' -#' # Example 1 - Simplified vector output: -#' x_corrected <- mcs_delays( -#' date_prod = date_of_production, -#' date_register = date_of_registration, -#' date_repair = date_of_repair, -#' date_report = date_of_report, -#' time = op_time, -#' status = status, -#' distribution = "lognormal", -#' details = FALSE -#' ) -#' -#' # Example 2 - Detailed list output: -#' list_detail <- mcs_delays( -#' date_prod = date_of_production, -#' date_register = date_of_registration, -#' date_repair = date_of_repair, -#' date_report = date_of_report, -#' time = op_time, -#' status = status, -#' distribution = "lognormal", -#' details = TRUE -#' ) -#' -#' @md -#' -#' @export -mcs_delays <- function(date_prod, - date_register, - date_repair, - date_report, - time, - status, - distribution = "lognormal", - details = FALSE -) { - deprecate_soft("2.0.0", "mcs_delays()", "mcs_delay()") - - # Number of Monte Carlo simulated random numbers, i.e. number of censored data. - - n_rand_regist <- sum(is.na(date_register)) - n_rand_report <- sum(status == 0) - - if (any(!stats::complete.cases(date_prod) | !stats::complete.cases(date_register))) { - prod_date <- date_prod[(stats::complete.cases(date_prod) & - stats::complete.cases(date_register))] - register_date <- date_register[(stats::complete.cases(date_prod) & - stats::complete.cases(date_register))] - } else { - prod_date <- date_prod - register_date <- date_register - } - - if (any(!stats::complete.cases(date_repair) | !stats::complete.cases(date_report))) { - repair_date <- date_repair[(stats::complete.cases(date_repair) & - stats::complete.cases(date_report))] - report_date <- date_report[(stats::complete.cases(date_repair) & - stats::complete.cases(date_report))] - } else { - repair_date <- date_repair - report_date <- date_report - } - - if (distribution == "lognormal") { - params_regist <- dist_delay_register(date_prod = prod_date, - date_register = register_date, - distribution = "lognormal") - params_report <- dist_delay_report(date_repair = repair_date, - date_report = report_date, - distribution = "lognormal") - - x_sim_regist <- stats::rlnorm(n = n_rand_regist, meanlog = params_regist[[1]], - sdlog = params_regist[[2]]) - x_sim_report <- stats::rlnorm(n = n_rand_report, meanlog = params_report[[1]], - sdlog = params_report[[2]]) - } else { - stop("No valid distribution!") - } - - time[is.na(date_register)] <- time[is.na(date_register)] - x_sim_regist - time[status == 0] <- time[status == 0] - x_sim_report - - if (details == FALSE) { - output <- time - } else { - output <- list(time = time, x_sim_regist = x_sim_regist, - x_sim_report = x_sim_report, - coefficients_regist = params_regist, - coefficients_report = params_report) - } - return(output) -} diff --git a/R/mcs_data.R b/R/mcs_data.R index 25f7562..2cddb74 100644 --- a/R/mcs_data.R +++ b/R/mcs_data.R @@ -41,8 +41,8 @@ #' (`date_2.1`, `date_2.2`, `...`, `date_2.i`). #' * `time` : Input operating times. #' * `status` (**optional**) : -#' * If argument `status = NULL` column `status` does not exist. -#' * If argument `status` is provided the column contains the entered binary +#' * If `is.null(status)` column `status` does not exist. +#' * If `status` is provided the column contains the entered binary #' data (0 or 1). #' * `id` : Identification for every unit. #' @@ -300,8 +300,8 @@ mcs_delay_data <- function(data = NULL, #' * `mileage` : Input mileages. #' * `time` : Input operating times. #' * `status` (**optional**) : -#' * If argument `status = NULL` column `status` does not exist. -#' * If argument `status` is provided the column contains the entered binary +#' * If `is.null(status)` column `status` does not exist. +#' * If `status` is provided the column contains the entered binary #' data (0 or 1). #' * `id` : Identification for every unit. #' diff --git a/R/mcs_delay.R b/R/mcs_delay.R new file mode 100644 index 0000000..0c460a4 --- /dev/null +++ b/R/mcs_delay.R @@ -0,0 +1,779 @@ +#' Adjustment of Operating Times by Delays using a Monte Carlo Approach +#' +#' @description +#' +#' In general, the amount of available information about units in the field is very +#' different. During the warranty period, there are only a few cases with complete +#' data (mainly *failed units*) but lots of cases with incomplete data (usually +#' *censored units*). As a result, the operating time of units with incomplete +#' information is often inaccurate and must be adjusted by delays. +#' +#' This function reduces the operating times of incomplete observations by simulated +#' delays (in days). A unit is considered as incomplete if the later of the +#' related dates is unknown. See 'Details' for some practical examples. +#' +#' Random delay numbers are drawn from the distribution determined by complete cases +#' (described in 'Details' of [dist_delay]). +#' +#' @details +#' In field data analysis time-dependent characteristics (e.g. *time in service*) +#' are often imprecisely recorded. These inaccuracies are caused by unconsidered delays. +#' +#' For a better understanding of the MCS application in the context of field data, +#' two cases are described below. +#' +#' * **Delay in registration**: It is common that a supplier, which provides +#' parts to the manufacturing industry does not know when the unit, in which +#' its parts are installed, were put in service (due to unknown registration or +#' sales date (`date_2`)). Without taking the described delay into account, the +#' time in service of the failed units would be the difference between the +#' repair date and the production date (`date_1`) and for intact units the +#' difference between the present date and the production date. But the real +#' operating times are (much) shorter, since the stress on the components have +#' not started until the whole systems were put in service. Hence, units with +#' incomplete data (missing `date_2`) must be reduced by the delays. +#' * **Delay in report**:: Authorized repairers often do not immediately +#' notify the manufacturer or OEM of repairs that were made during the warranty +#' period, but instead pass the information about these repairs in collected +#' forms e.g. weekly, monthly or quarterly. The resulting time difference between +#' the reporting (`date_2`) of the repair in the guarantee database and the +#' actual repair date (`date_1`), which is often assumed to be the failure +#' date, is called the reporting delay. For a given date where the analysis +#' is made there could be units which had a failure but the failure isn't +#' reported and therefore they are treated as censored units. In order to take +#' this into account and according to the principle of equal opportunities, the +#' lifetime of units with missing report date (`date_2[i] = NA`) is reduced by +#' simulated reporting delays. +#' +#' @inheritParams dist_delay +#' +#' @return A list with class `wt_mcs_delay` containing the following elements: +#' +#' * `data` : A `tibble` returned by [mcs_delay_data] where two modifications +#' has been made: +#' +#' * If the column `status` exists, the `tibble` has additional classes +#' `wt_mcs_data` and `wt_reliability_data`. Otherwise, the `tibble` only has +#' the additional class `wt_mcs_data` (which is not supported by [estimate_cdf]). +#' * The column `time` is renamed to `x` (to be in accordance with +#' [reliability_data]) and contains the adjusted operating times for incomplete +#' observations and input operating times for the complete observations. +#' +#' * `sim_data` : A `tibble` with column `sim_delay` that holds the simulated +#' delay-specific numbers for incomplete cases and `0` for complete cases. +#' If more than one delay was considered multiple columns with names `sim_delay_1`, +#' `sim_delay_2`, ..., `sim_delay_i` and corresponding delay-specific random +#' numbers are presented. +#' * `model_estimation` : A list returned by [dist_delay]. +#' +#' @references Verband der Automobilindustrie e.V. (VDA); Qualitätsmanagement in +#' der Automobilindustrie. Zuverlässigkeitssicherung bei Automobilherstellern +#' und Lieferanten. Zuverlässigkeits-Methoden und -Hilfsmittel.; 4th Edition, 2016, +#' ISSN:0943-9412 +#' +#' @seealso [dist_delay] for the determination of a parametric delay distribution +#' and [estimate_cdf] for the estimation of failure probabilities. +#' +#' @examples +#' # MCS data preparation: +#' ## Data for delay in registration: +#' mcs_tbl_1 <- mcs_delay_data( +#' field_data, +#' date_1 = production_date, +#' date_2 = registration_date, +#' time = dis, +#' status = status, +#' id = vin +#' ) +#' +#' ## Data for delay in report: +#' mcs_tbl_2 <- mcs_delay_data( +#' field_data, +#' date_1 = repair_date, +#' date_2 = report_date, +#' time = dis, +#' status = status, +#' id = vin +#' ) +#' +#' ## Data for both delays: +#' mcs_tbl_both <- mcs_delay_data( +#' field_data, +#' date_1 = c(production_date, repair_date), +#' date_2 = c(registration_date, report_date), +#' time = dis, +#' status = status, +#' id = vin +#' ) +#' +#' # Example 1 - MCS for delay in registration: +#' mcs_regist <- mcs_delay( +#' x = mcs_tbl_1, +#' distribution = "lognormal" +#' ) +#' +#' # Example 2 - MCS for delay in report: +#' mcs_report <- mcs_delay( +#' x = mcs_tbl_2, +#' distribution = "exponential" +#' ) +#' +#' # Example 3 - Reproducibility of random numbers: +#' set.seed(1234) +#' mcs_report_reproduce <- mcs_delay( +#' x = mcs_tbl_2, +#' distribution = "exponential" +#' ) +#' +#' # Example 4 - MCS for delays in registration and report with same distribution: +#' mcs_delays <- mcs_delay( +#' x = mcs_tbl_both, +#' distribution = "lognormal" +#' ) +#' +#' # Example 5 - MCS for delays in registration and report with different distributions: +#' ## Assuming lognormal registration and exponential reporting delays. +#' mcs_delays_2 <- mcs_delay( +#' x = mcs_tbl_both, +#' distribution = c("lognormal", "exponential") +#' ) +#' +#' @md +#' +#' @export +mcs_delay <- function(...) { + UseMethod("mcs_delay") +} + + + +#' @rdname mcs_delay +#' +#' @export +mcs_delay.wt_mcs_delay_data <- function( + ..., + x, + distribution = c("lognormal", "exponential") +) { + + # Check that '...' argument is not used: + check_dots(...) + + # Extract 'mcs_start_dates', 'mcs_end_dates' and 'time 'columns as list: + date_1_names <- attr(x, "mcs_start_dates") + date_2_names <- attr(x, "mcs_end_dates") + + date_1 <- dplyr::select(x, {{date_1_names}}) + date_2 <- dplyr::select(x, {{date_2_names}}) + time <- x$time + + + mcs_delay_( + data = x, + date_1 = date_1, + date_2 = date_2, + time = time, + distribution = distribution + ) +} + + + +#' Adjustment of Operating Times by Delays using a Monte Carlo Approach +#' +#' @inherit mcs_delay description details return references seealso +#' +#' @inheritParams dist_delay.default +#' @inheritParams mcs_delay_data +#' +#' @examples +#' # Example 1 - MCS for delay in registration: +#' mcs_regist <- mcs_delay( +#' date_1 = field_data$production_date, +#' date_2 = field_data$registration_date, +#' time = field_data$dis, +#' status = field_data$status, +#' distribution = "lognormal" +#' ) +#' +#' # Example 2 - MCS for delay in report: +#' mcs_report <- mcs_delay( +#' date_1 = field_data$repair_date, +#' date_2 = field_data$report_date, +#' time = field_data$dis, +#' status = field_data$status, +#' distribution = "exponential" +#' ) +#' +#' # Example 3 - Reproducibility of random numbers: +#' set.seed(1234) +#' mcs_report_reproduce <- mcs_delay( +#' date_1 = field_data$repair_date, +#' date_2 = field_data$report_date, +#' time = field_data$dis, +#' status = field_data$status, +#' distribution = "exponential" +#' ) +#' +#' # Example 4 - MCS for delays in registration and report with same distribution: +#' mcs_delays <- mcs_delay( +#' date_1 = list(field_data$production_date, field_data$repair_date), +#' date_2 = list(field_data$registration_date, field_data$report_date), +#' time = field_data$dis, +#' status = field_data$status, +#' distribution = "lognormal" +#' ) +#' +#' # Example 5 - MCS for delays in registration and report with different distributions: +#' ## Assuming lognormal registration and exponential reporting delays. +#' mcs_delays_2 <- mcs_delay( +#' date_1 = list(field_data$production_date, field_data$repair_date), +#' date_2 = list(field_data$registration_date, field_data$report_date), +#' time = field_data$dis, +#' status = field_data$status, +#' distribution = c("lognormal", "exponential") +#' ) +#' +#' @md +#' +#' @export +mcs_delay.default <- function(..., + date_1, + date_2, + time, + status = NULL, + id = paste0("ID", seq_len(length(time))), + distribution = c("lognormal", "exponential") +) { + + # Checks: + ## Check that '...' argument is not used: + check_dots(...) + + ## Convert date_1 and date_2 to lists if they are vectors: + if (!is.list(date_1)) date_1 <- list(date_1) + if (!is.list(date_2)) date_2 <- list(date_2) + + mcs_delay_( + date_1 = date_1, + date_2 = date_2, + time = time, + status = status, + id = id, + distribution = distribution + ) +} + + + +# Helper function that performs MCS for delays: +mcs_delay_ <- function(data = NULL, + date_1, # list + date_2, # list + time, # vector + status = NULL, + id = NULL, + distribution +) { + + # Step 1: Parameter estimation using complete cases: + ## Several checks (distributional and length) are made in dist_delay.default: + par_list <- dist_delay.default( + date_1 = date_1, + date_2 = date_2, + distribution = distribution + ) + + ## New check is needed since dist_delay.default is used in favour of dist_delay_: + if (!inherits(par_list, "wt_delay_estimation_list")) { + par_list <- list(par_list) + } + + # Step 2: Simulation of random numbers: + sim_list <- purrr::map2( + date_2, + par_list, # list with class + mcs_helper + ) + + ## Adjustment of operating times: + times <- time - purrr::reduce(sim_list, `+`) + + # Step 3: Create output: + ## Create MCS_Delay_Data and renaming 'time' to 'x': + ## vector-based: + if (purrr::is_null(data)) { + data_tbl <- mcs_delay_data( + date_1 = date_1, + date_2 = date_2, + time = times, + status = status, + id = id + ) + } else { + ## data-based: only 'time' must be updated! + data_tbl <- dplyr::mutate(data, time = times) + } + + data_tbl <- dplyr::rename(data_tbl, x = time) + + ## Set class and attribute w.r.t status; remove class "wt_mcs_delay_data": + if ("status" %in% names(data_tbl)) { + class(data_tbl) <- c("wt_reliability_data", "wt_mcs_data", class(data_tbl)[-1]) + attr(data_tbl, "characteristic") <- "time" + + } else { + class(data_tbl) <- c("wt_mcs_data", class(data_tbl)[-1]) + } + + # Remove attribute "mcs_start_dates" and "mcs_end_dates": + attr(data_tbl, "mcs_start_dates") <- NULL + attr(data_tbl, "mcs_end_dates") <- NULL + + ## Set names of sim_list w.r.t number of considered delays: + if (length(sim_list) > 1) { + names(sim_list) <- paste0("sim_delay_", seq_along(sim_list)) + } else { + names(sim_list) <- "sim_delay" + } + + mcs_output <- list( + data = data_tbl, + sim_data = tibble::as_tibble(sim_list), + model_estimation = par_list + ) + + class(mcs_output) <- c("wt_mcs_delay", class(mcs_output)) + + mcs_output +} + + + +# Helper function to generate MCS random numbers: +mcs_helper <- function(x, par_list) { + # adjustment can only be done for units that have a x entry of NA! Otherwise + # data would be complete and no simulation is needed. + replacable <- is.na(x) + + # generate random numbers: + if (par_list$distribution == "lognormal") { + x_sim <- stats::rlnorm( + length(x), + par_list$coefficients[1], + par_list$coefficients[2] + ) + } + + if (par_list$distribution == "exponential") { + x_sim <- stats::rexp( + length(x), + 1 / par_list$coefficients[1] + ) + } + + x_sim[!replacable] <- 0 + + x_sim +} + + + +#' Adjustment of Operating Times by Delays in Registration using a Monte Carlo +#' Approach +#' +#' @description +#' `r lifecycle::badge("soft-deprecated")` +#' +#' `mcs_delay_register()` is no longer under active development, switching +#' to [mcs_delay] is recommended. +#' +#' @details +#' In general the amount of information about units in the field, that have not +#' failed yet, are rare. For example it is common that a supplier, who provides +#' parts to the automotive industry does not know when a vehicle was put in +#' service and therefore does not know the exact operating time of the supplied +#' parts. This function uses a Monte Carlo approach for simulating the operating +#' times of (multiple) right censored observations, taking account of registering +#' delays. The simulation is based on the distribution of operating times that were +#' calculated from complete data (see [dist_delay_register]). +#' +#' @inheritParams dist_delay_register +#' @param time A numeric vector of operating times. +#' @param status A vector of binary data (0 or 1) indicating whether unit *i* is +#' a right censored observation (= 0) or a failure (= 1). +#' @param distribution Supposed distribution of the random variable. Only +#' `"lognormal"` is implemented. +#' @param details A logical. If `FALSE` the output consists of a vector with +#' corrected operating times for the censored units and the input operating +#' times for the failed units. If `TRUE` the output consists of a detailed +#' list, i.e the same vector as described before, simulated random numbers and +#' estimated distribution parameters. +#' +#' @return A numeric vector of corrected operating times for the censored units +#' and the input operating times for the failed units if `details = FALSE`. +#' If `details = TRUE` the output is a list which consists of the following elements: +#' +#' * `time` : Numeric vector of corrected operating times for the censored +#' observations and input operating times for failed units. +#' * `x_sim` : Simulated random numbers of specified distribution with estimated +#' parameters. The length of `x_sim` is equal to the number of censored observations. +#' * `coefficients` : Estimated coefficients of supposed distribution. +#' +#' @examples +#' date_of_production <- c("2014-07-28", "2014-02-17", "2014-07-14", +#' "2014-06-26", "2014-03-10", "2014-05-14", +#' "2014-05-06", "2014-03-07", "2014-03-09", +#' "2014-04-13", "2014-05-20", "2014-07-07", +#' "2014-01-27", "2014-01-30", "2014-03-17", +#' "2014-02-09", "2014-04-14", "2014-04-20", +#' "2014-03-13", "2014-02-23", "2014-04-03", +#' "2014-01-08", "2014-01-08") +#' date_of_registration <- c(NA, "2014-03-29", "2014-12-06", "2014-09-09", +#' NA, NA, "2014-06-16", NA, "2014-05-23", +#' "2014-05-09", "2014-05-31", NA, "2014-04-13", +#' NA, NA, "2014-03-12", NA, "2014-06-02", +#' NA, "2014-03-21", "2014-06-19", NA, NA) +#' +#' op_time <- rep(1000, length(date_of_production)) +#' status <- c(0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0) +#' +#' # Example 1 - Simplified vector output: +#' x_corrected <- mcs_delay_register( +#' date_prod = date_of_production, +#' date_register = date_of_registration, +#' time = op_time, +#' status = status, +#' distribution = "lognormal", +#' details = FALSE +#' ) +#' +#' # Example 2 - Detailed list output: +#' list_detail <- mcs_delay_register( +#' date_prod = date_of_production, +#' date_register = date_of_registration, +#' time = op_time, +#' status = status, +#' distribution = "lognormal", +#' details = TRUE +#' ) +#' +#' @md +#' +#' @export +mcs_delay_register <- function(date_prod, + date_register, + time, + status, + distribution = "lognormal", + details = FALSE +) { + deprecate_soft("2.0.0", "mcs_delay_register()", "mcs_delay()") + + # Number of Monte Carlo simulated random numbers, i.e. number of censored data. + n_rand <- sum(is.na(date_register)) + if (any(!stats::complete.cases(date_prod) | !stats::complete.cases(date_register))) { + prod_date <- date_prod[(stats::complete.cases(date_prod) & + stats::complete.cases(date_register))] + register_date <- date_register[(stats::complete.cases(date_prod) & + stats::complete.cases(date_register))] + } else { + prod_date <- date_prod + register_date <- date_register + } + + if (distribution == "lognormal") { + params <- dist_delay_register(date_prod = prod_date, + date_register = register_date, + distribution = "lognormal") + + x_sim <- stats::rlnorm(n = n_rand, meanlog = params[[1]], sdlog = params[[2]]) + } else { + stop("No valid distribution!") + } + + time[is.na(date_register)] <- time[is.na(date_register)] - x_sim + + if (details == FALSE) { + output <- time + } else { + output <- list(time = time, x_sim = x_sim, coefficients = params) + } + return(output) +} + + + +#' Adjustment of Operating Times by Delays in Report using a Monte Carlo Approach +#' +#' @description +#' `r lifecycle::badge("soft-deprecated")` +#' +#' `mcs_delay_report()` is no longer under active development, switching to +#' [mcs_delay] is recommended. +#' +#' @details +#' The delay in report describes the time between the occurrence of a damage and +#' the registration in the warranty database. For a given date where the analysis +#' is made there could be units which had a failure but are not registered in the +#' database and therefore treated as censored units. To overcome this problem +#' this function uses a Monte Carlo approach for simulating the operating +#' times of (multiple) right censored observations, taking account of reporting +#' delays. The simulation is based on the distribution of operating times that were +#' calculated from complete data, i.e. failed items (see [dist_delay_report]). +#' +#' @inheritParams dist_delay_report +#' @inheritParams mcs_delay_register +#' +#' @return A numeric vector of corrected operating times for the censored units +#' and the input operating times for the failed units if `details = FALSE`. +#' If `details = TRUE` the output is a list which consists of the following +#' elements: +#' +#' * `time` : Numeric vector of corrected operating times for the censored +#' observations and input operating times for failed units. +#' * `x_sim` : Simulated random numbers of specified distribution with +#' estimated parameters. The length of `x_sim` is equal to the number of +#' censored observations. +#' * `coefficients` : Estimated coefficients of supposed distribution. +#' +#' @examples +#' date_of_repair <- c(NA, "2014-09-15", "2015-07-04", "2015-04-10", NA, +#' NA, "2015-04-24", NA, "2015-04-25", "2015-04-24", +#' "2015-06-12", NA, "2015-05-04", NA, NA, +#' "2015-05-22", NA, "2015-09-17", NA, "2015-08-15", +#' "2015-11-26", NA, NA) +#' +#' date_of_report <- c(NA, "2014-10-09", "2015-08-28", "2015-04-15", NA, +#' NA, "2015-05-16", NA, "2015-05-28", "2015-05-15", +#' "2015-07-11", NA, "2015-08-14", NA, NA, +#' "2015-06-05", NA, "2015-10-17", NA, "2015-08-21", +#' "2015-12-02", NA, NA) +#' +#' op_time <- rep(1000, length(date_of_repair)) +#' status <- c(0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0) +#' +#' # Example 1 - Simplified vector output: +#' x_corrected <- mcs_delay_report( +#' date_repair = date_of_repair, +#' date_report = date_of_report, +#' time = op_time, +#' status = status, +#' distribution = "lognormal", +#' details = FALSE +#' ) +#' +#' # Example 2 - Detailed list output: +#' list_detail <- mcs_delay_report( +#' date_repair = date_of_repair, +#' date_report = date_of_report, +#' time = op_time, +#' status = status, +#' distribution = "lognormal", +#' details = TRUE +#' ) +#' +#' @md +#' +#' @export +mcs_delay_report <- function(date_repair, + date_report, + time, + status, + distribution = "lognormal", + details = FALSE +) { + deprecate_soft("2.0.0", "mcs_delay_report()", "mcs_delay()") + + # Number of Monte Carlo simulated random numbers, i.e. number of censored data. + n_rand <- sum(status == 0) + + if (any(!stats::complete.cases(date_repair) | !stats::complete.cases(date_report))) { + repair_date <- date_repair[(stats::complete.cases(date_repair) & + stats::complete.cases(date_report))] + report_date <- date_report[(stats::complete.cases(date_repair) & + stats::complete.cases(date_report))] + } else { + repair_date <- date_repair + report_date <- date_report + } + + if (distribution == "lognormal") { + params <- dist_delay_report(date_repair = repair_date, + date_report = report_date, + distribution = "lognormal") + + x_sim <- stats::rlnorm(n = n_rand, meanlog = params[[1]], sdlog = params[[2]]) + } else { + stop("No valid distribution!") + } + + time[status == 0] <- time[status == 0] - x_sim + + if (details == FALSE) { + output <- time + } else { + output <- list(time = time, x_sim = x_sim, coefficients = params) + } + return(output) +} + + + +#' Adjustment of Operating Times by Delays using a Monte Carlo Approach +#' +#' @description +#' `r lifecycle::badge("soft-deprecated")` +#' +#' `mcs_delays()` is no longer under active development, switching to [mcs_delay] +#' is recommended. +#' +#' @details +#' This function is a wrapper that combines both, [mcs_delay_register] and +#' [mcs_delay_report] functions for the adjustment of operating times of censored units. +#' +#' @inheritParams mcs_delay_register +#' @inheritParams dist_delay_report +#' +#' @return A numerical vector of corrected operating times for the censored units +#' and the input operating times for the failed units if +#' `details = FALSE`. If `details = TRUE` the output is a list which +#' consists of the following elements: +#' +#' * `time` : A numeric vector of corrected operating times for the censored +#' observations and input operating times for failed units. +#' * `x_sim_regist` : Simulated random numbers of specified distribution with +#' estimated parameters for delay in registration. The length of `x_sim_regist` +#' is equal to the number of censored observations. +#' * `x_sim_report` : Simulated random numbers of specified distribution with +#' estimated parameters for delay in report. The length of `x_sim_report` is +#' equal to the number of censored observations. +#' * `coefficients_regist` : Estimated coefficients of supposed distribution for +#' delay in registration. +#' * `coefficients_report` : Estimated coefficients of supposed distribution for +#' delay in report +#' +#' @examples +#' date_of_production <- c("2014-07-28", "2014-02-17", "2014-07-14", +#' "2014-06-26", "2014-03-10", "2014-05-14", +#' "2014-05-06", "2014-03-07", "2014-03-09", +#' "2014-04-13", "2014-05-20", "2014-07-07", +#' "2014-01-27", "2014-01-30", "2014-03-17", +#' "2014-02-09", "2014-04-14", "2014-04-20", +#' "2014-03-13", "2014-02-23", "2014-04-03", +#' "2014-01-08", "2014-01-08") +#' date_of_registration <- c("2014-08-17", "2014-03-29", "2014-12-06", +#' "2014-09-09", "2014-05-14", "2014-07-01", +#' "2014-06-16", "2014-04-03", "2014-05-23", +#' "2014-05-09", "2014-05-31", "2014-08-12", +#' "2014-04-13", "2014-02-15", "2014-07-07", +#' "2014-03-12", "2014-05-27", "2014-06-02", +#' "2014-05-20", "2014-03-21", "2014-06-19", +#' "2014-02-12", "2014-03-27") +#' date_of_repair <- c(NA, "2014-09-15", "2015-07-04", "2015-04-10", NA, +#' NA, "2015-04-24", NA, "2015-04-25", "2015-04-24", +#' "2015-06-12", NA, "2015-05-04", NA, NA, +#' "2015-05-22", NA, "2015-09-17", NA, "2015-08-15", +#' "2015-11-26", NA, NA) +#' +#' date_of_report <- c(NA, "2014-10-09", "2015-08-28", "2015-04-15", NA, +#' NA, "2015-05-16", NA, "2015-05-28", "2015-05-15", +#' "2015-07-11", NA, "2015-08-14", NA, NA, +#' "2015-06-05", NA, "2015-10-17", NA, "2015-08-21", +#' "2015-12-02", NA, NA) +#' +#' op_time <- rep(1000, length(date_of_repair)) +#' status <- c(0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0) +#' +#' # Example 1 - Simplified vector output: +#' x_corrected <- mcs_delays( +#' date_prod = date_of_production, +#' date_register = date_of_registration, +#' date_repair = date_of_repair, +#' date_report = date_of_report, +#' time = op_time, +#' status = status, +#' distribution = "lognormal", +#' details = FALSE +#' ) +#' +#' # Example 2 - Detailed list output: +#' list_detail <- mcs_delays( +#' date_prod = date_of_production, +#' date_register = date_of_registration, +#' date_repair = date_of_repair, +#' date_report = date_of_report, +#' time = op_time, +#' status = status, +#' distribution = "lognormal", +#' details = TRUE +#' ) +#' +#' @md +#' +#' @export +mcs_delays <- function(date_prod, + date_register, + date_repair, + date_report, + time, + status, + distribution = "lognormal", + details = FALSE +) { + deprecate_soft("2.0.0", "mcs_delays()", "mcs_delay()") + + # Number of Monte Carlo simulated random numbers, i.e. number of censored data. + + n_rand_regist <- sum(is.na(date_register)) + n_rand_report <- sum(status == 0) + + if (any(!stats::complete.cases(date_prod) | !stats::complete.cases(date_register))) { + prod_date <- date_prod[(stats::complete.cases(date_prod) & + stats::complete.cases(date_register))] + register_date <- date_register[(stats::complete.cases(date_prod) & + stats::complete.cases(date_register))] + } else { + prod_date <- date_prod + register_date <- date_register + } + + if (any(!stats::complete.cases(date_repair) | !stats::complete.cases(date_report))) { + repair_date <- date_repair[(stats::complete.cases(date_repair) & + stats::complete.cases(date_report))] + report_date <- date_report[(stats::complete.cases(date_repair) & + stats::complete.cases(date_report))] + } else { + repair_date <- date_repair + report_date <- date_report + } + + if (distribution == "lognormal") { + params_regist <- dist_delay_register(date_prod = prod_date, + date_register = register_date, + distribution = "lognormal") + params_report <- dist_delay_report(date_repair = repair_date, + date_report = report_date, + distribution = "lognormal") + + x_sim_regist <- stats::rlnorm(n = n_rand_regist, meanlog = params_regist[[1]], + sdlog = params_regist[[2]]) + x_sim_report <- stats::rlnorm(n = n_rand_report, meanlog = params_report[[1]], + sdlog = params_report[[2]]) + } else { + stop("No valid distribution!") + } + + time[is.na(date_register)] <- time[is.na(date_register)] - x_sim_regist + time[status == 0] <- time[status == 0] - x_sim_report + + if (details == FALSE) { + output <- time + } else { + output <- list(time = time, x_sim_regist = x_sim_regist, + x_sim_report = x_sim_report, + coefficients_regist = params_regist, + coefficients_report = params_report) + } + return(output) +} diff --git a/R/mcs_mileage.R b/R/mcs_mileage.R new file mode 100644 index 0000000..4bd76b4 --- /dev/null +++ b/R/mcs_mileage.R @@ -0,0 +1,241 @@ +#' Simulation of Unknown Covered Distances using a Monte Carlo Approach +#' +#' @description +#' This function simulates distances for units where these are unknown. +#' +#' First, random numbers of the annual mileage distribution, estimated by +#' [dist_mileage], are drawn. Second, the drawn annual distances are +#' converted with respect to the actual operating times (in days) using a linear +#' relationship. See 'Details'. +#' +#' @details +#' **Assumption of linear relationship**: Imagine the distance of the vehicle +#' is unknown. A distance of 3500.25 kilometers (km) was drawn from the annual +#' distribution and the known operating time is 200 days (d). So the resulting +#' distance of this vehicle is +#' \deqn{3500.25 km \cdot (\frac{200 d} {365 d}) = 1917.945 km}{% +#' 3500.25 km * (200 d / 365 d) = 1917.945 km} +#' +#' @inheritParams dist_mileage +#' +#' @return A list with class `wt_mcs_mileage` containing the following elements: +#' +#' * `data` : A `tibble` returned by [mcs_mileage_data] where two modifications +#' has been made: +#' +#' * If the column `status` exists, the `tibble` has additional classes +#' `wt_mcs_data` and `wt_reliability_data`. Otherwise, the `tibble` only has +#' the additional class `wt_mcs_data` (which is not supported by [estimate_cdf]). +#' * The column `mileage` is renamed to `x` (to be in accordance with +#' [reliability_data]) and contains simulated distances for incomplete +#' observations and input distances for the complete observations. +#' * `sim_data` : A `tibble` with column `sim_mileage` that holds the simulated +#' distances for incomplete cases and `0` for complete cases. +#' * `model_estimation` : A list returned by [dist_mileage]. +#' +#' @seealso [dist_mileage] for the determination of a parametric annual mileage +#' distribution and [estimate_cdf] for the estimation of failure probabilities. +#' +#' @examples +#' # MCS data preparation: +#' mcs_tbl <- mcs_mileage_data( +#' field_data, +#' mileage = mileage, +#' time = dis, +#' status = status, +#' id = vin +#' ) +#' +#' # Example 1 - Reproducibility of drawn random numbers: +#' set.seed(1234) +#' mcs_distances <- mcs_mileage( +#' x = mcs_tbl, +#' distribution = "lognormal" +#' ) +#' +#' # Example 2 - MCS for distances with exponential annual mileage distribution: +#' mcs_distances_2 <- mcs_mileage( +#' x = mcs_tbl, +#' distribution = "exponential" +#' ) +#' +#' # Example 3 - MCS for distances with downstream probability estimation: +#' ## Apply 'estimate_cdf()' to *$data: +#' prob_estimation <- estimate_cdf( +#' x = mcs_distances$data, +#' methods = "kaplan" +#' ) +#' +#' ## Apply 'plot_prob()': +#' plot_prob_estimation <- plot_prob(prob_estimation) +#' +#' @md +#' +#' @export +mcs_mileage <- function(x, distribution, ...) { + UseMethod("mcs_mileage") +} + + + +#' @rdname mcs_mileage +#' +#' @export +mcs_mileage.wt_mcs_mileage_data <- function( + x, + distribution = c("lognormal", "exponential"), + ... +) { + + # Checks: + ## Check for distributions: + distribution <- match.arg(distribution) + + mileage <- x$mileage + time <- x$time + + mcs_mileage_( + data = x, + x = mileage, + time = time, + distribution = distribution + ) +} + + + +#' Simulation of Unknown Covered Distances using a Monte Carlo Approach +#' +#' @description +#' +#' @inherit mcs_mileage description details return seealso +#' +#' @inheritParams dist_mileage.default +#' @inheritParams mcs_mileage_data +#' +#' @examples +#' # Example 1 - Reproducibility of drawn random numbers: +#' set.seed(1234) +#' mcs_distances <- mcs_mileage( +#' x = field_data$mileage, +#' time = field_data$dis, +#' status = field_data$status, +#' id = field_data$vin, +#' distribution = "lognormal" +#' ) +#' +#' # Example 2 - MCS for distances with exponential annual mileage distribution: +#' mcs_distances_2 <- mcs_mileage( +#' x = field_data$mileage, +#' time = field_data$dis, +#' status = field_data$status, +#' id = field_data$vin, +#' distribution = "exponential" +#' ) +#' +#' # Example 3 - MCS for distances with downstream probability estimation: +#' ## Apply 'estimate_cdf()' to *$data: +#' prob_estimation <- estimate_cdf( +#' x = mcs_distances$data, +#' methods = "kaplan" +#' ) +#' +#' ## Apply 'plot_prob()': +#' plot_prob_estimation <- plot_prob(prob_estimation) +#' +#' @md +#' +#' @export +mcs_mileage.default <- function(x, + time, + status = NULL, + id = paste0("ID", seq_len(length(time))), + distribution = c("lognormal", "exponential"), + ... +) { + + # Checks: + ## Check for distributions: + distribution <- match.arg(distribution) + + ## Check for different length in time and x: + if (length(x) != length(time)) { + stop("Elements of 'x' and 'time' differ in length!") + } + + mcs_mileage_( + x = x, + time = time, + status = status, + id = id, + distribution = distribution + ) +} + + + +# Helper function that performs MCS for distances: +mcs_mileage_ <- function(data = NULL, + x, # vector + time, # vector, + status = NULL, + id = NULL, + distribution +) { + + # Step 1: Parameter estimation using complete cases: + par_list <- dist_mileage.default( + x = x, + time = time, + distribution = distribution + ) + + # Step 2: Simulation of random numbers: + sim_nums <- mcs_helper( + x = x, + par_list = par_list + ) + + ## Imputation of missing mileages: + x[is.na(x)] <- (sim_nums[is.na(x)] * time[is.na(x)]) / 365 + + # Step 3: Create output: + ## Create MCS_Mileage_Data and renaming 'mileage' to 'x': + ## vector-based: + if (purrr::is_null(data)) { + data_tbl <- mcs_mileage_data( + mileage = x, + time = time, + status = status, + id = id + ) + } else { + ## data-based: only 'mileage' must be updated! + data_tbl <- dplyr::mutate(data, mileage = x) + } + + data_tbl <- dplyr::rename(data_tbl, x = mileage) + + ## Set class and attribute w.r.t status; remove class "wt_mcs_mileage_data": + if ("status" %in% names(data_tbl)) { + class(data_tbl) <- c("wt_reliability_data", "wt_mcs_data", class(data_tbl)[-1]) + attr(data_tbl, "characteristic") <- "mileage" + } else { + class(data_tbl) <- c("wt_mcs_data", class(data_tbl)[-1]) + } + + # Remove attribute "mcs_characteristic": + attr(data_tbl, "mcs_characteristic") <- NULL + + mcs_output <- list( + data = data_tbl, + sim_data = tibble::tibble(sim_mileage = sim_nums), + model_estimation = list( + mileage_distribution = par_list + ) + ) + + class(mcs_output) <- c("wt_mcs_mileage", class(mcs_output)) + + mcs_output +} diff --git a/R/mileage_distribution.R b/R/mileage_distribution.R index 4428cd3..edabae5 100644 --- a/R/mileage_distribution.R +++ b/R/mileage_distribution.R @@ -8,120 +8,190 @@ #' estimated with maximum likelihood. See 'Details' for more information. #' #' @details -#' The distribution parameter(s) are determined on the basis of complete cases, -#' i.e. there is no \code{NA} in one of the related vector elements -#' \code{c(mileage[i], time[i])}. Distances and operating times less than or equal -#' to zero are not considered as well. +#' The distribution parameter(s) is (are) determined on the basis of complete +#' cases, i.e. there is no `NA` (row-wise) in one of the related columns `mileage` +#' and `time`. Distances and operating times less than or equal to zero are not +#' considered as well. #' -#' \strong{Assumption of linear relationship}: Imagine a component in a vehicle +#' **Assumption of linear relationship**: Imagine a component in a vehicle #' has endured a distance of 25000 kilometers (km) in 500 days (d), the annual #' distance of this unit is \deqn{25000 km \cdot (\frac{365 d} {500 d}) = 18250 km}{% #' 25000 km * (365 d / 500 d) = 18250 km} #' -#' @param mileage A numeric vector of distances covered. Use \code{NA} for missing -#' elements. -#' @param time A numeric vector of operating times. Use \code{NA} for missing -#' elements. -#' @param distribution Supposed distribution of the random variable. +#' @param x A `tibble` of class `wt_mcs_mileage_data` returned by [mcs_mileage_data]. +#' @param distribution Supposed distribution of the annual mileage. +#' @template dots #' -#' @return A list of class \code{mileage_estimation} which contains: -#' \itemize{ -#' \item \code{coefficients} : A named vector of estimated parameter(s). -#' \item \code{miles_annual} : A numeric vector of element-wise computed annual -#' distances using the linear relationship described in 'Details'. -#' \item \code{distribution} : Specified distribution. -#' } +#' @return A list with class `wt_mileage_estimation` which contains: #' -#' @examples -#' # Data for examples: -#' date_of_registration <- c("2014-08-17", "2014-03-29", "2014-12-06", -#' "2014-09-09", "2014-05-14", "2014-07-01", -#' "2014-06-16", "2014-04-03", "2014-05-23", -#' "2014-05-09", "2014-05-31", "2014-08-12", -#' "2014-04-13", "2014-02-15", "2014-07-07", -#' "2014-03-12", "2014-05-27", "2014-06-02", -#' "2014-05-20", "2014-03-21", "2014-06-19", -#' "2014-02-12", "2014-03-27") -#' date_of_repair <- c(NA, "2014-09-15", "2015-07-04", "2015-04-10", NA, -#' NA, "2015-04-24", NA, "2015-04-25", "2015-04-24", -#' "2015-06-12", NA, "2015-05-04", NA, NA, "2015-05-22", -#' NA, "2015-09-17", NA, "2015-08-15", "2015-11-26", -#' NA, NA) -#' date_of_analysis <- "2015-12-31" +#' * `coefficients` : A named vector of estimated parameter(s). +#' * `miles_annual` : A numeric vector of element-wise computed annual distances +#' using the linear relationship described in 'Details'. +#' * `distribution` : Specified distribution. #' -#' ## Assume that mileage is only known for units that have failed (date_of_repair != NA). -#' mileage <- c(NA, 15655, 13629, 18292, NA, NA, 33555, NA, 21737, -#' 29870, 21068, NA, 122283, NA, NA, 36088, NA, 11153, -#' NA, 122842, 20349, NA, NA) +#' @examples +#' # MCS data preparation: +#' mcs_tbl <- mcs_mileage_data( +#' field_data, +#' mileage = mileage, +#' time = dis, +#' status = status, +#' id = vin +#' ) #' -#' ## time in service is the difference between repair and registration for failed -#' ## items and the difference between date of analysis and date of registration -#' ## for intact units. -#' time_in_service <- difftime( -#' as.Date(date_of_repair, format = "%Y-%m-%d"), -#' as.Date(date_of_registration, format = "%Y-%m-%d"), -#' units = "days" +#' # Example 1 - Assuming lognormal annual mileage distribution: +#' params_mileage_annual <- dist_mileage( +#' x = mcs_tbl, +#' distribution = "lognormal" #' ) -#' time_in_service[is.na(time_in_service)] <- difftime( -#' as.Date(date_of_analysis, format = "%Y-%m-%d"), -#' as.Date(date_of_registration[is.na(time_in_service)], format = "%Y-%m-%d"), -#' units = "days" +#' +#' # Example 2 - Assuming exponential annual mileage distribution: +#' params_mileage_annual_2 <- dist_mileage( +#' x = mcs_tbl, +#' distribution = "exponential" #' ) -#' time_in_service <- as.numeric(time_in_service) #' +#' @md +#' +#' @export +dist_mileage <- function(x, distribution, ...) { + UseMethod("dist_mileage") +} + + + +#' @rdname dist_mileage +#' +#' @export +dist_mileage.wt_mcs_mileage_data <- function( + x, + distribution = c("lognormal", "exponential"), + ... +) { + + mileage <- x$mileage + time <- x$time + + # Use default method: + dist_mileage.default( + x = mileage, + time = time, + distribution = distribution + ) +} + + + +#' Parameter Estimation of an Annual Mileage Distribution +#' +#' @inherit dist_mileage description return +#' +#' @details +#' The distribution parameter(s) is (are) determined on the basis of complete cases, +#' i.e. there is no `NA` in one of the related vector elements +#' `c(mileage[i], time[i])`. Distances and operating times less than or equal +#' to zero are not considered as well. +#' +#' **Assumption of linear relationship**: Imagine a component in a vehicle +#' has endured a distance of 25000 kilometers (km) in 500 days (d), the annual +#' distance of this unit is \deqn{25000 km \cdot (\frac{365 d} {500 d}) = 18250 km}{% +#' 25000 km * (365 d / 500 d) = 18250 km} +#' +#' @inheritParams dist_mileage +#' @param x A numeric vector of distances covered. Use `NA` for missing elements. +#' @param time A numeric vector of operating times. Use `NA` for missing elements. +#' +#' @seealso [dist_mileage] +#' +#' @examples #' # Example 1 - Assuming lognormal annual mileage distribution: #' params_mileage_annual <- dist_mileage( -#' mileage = mileage, -#' time = time_in_service, +#' x = field_data$mileage, +#' time = field_data$dis, #' distribution = "lognormal" #' ) #' #' # Example 2 - Assuming exponential annual mileage distribution: #' params_mileage_annual_2 <- dist_mileage( -#' mileage = mileage, -#' time = time_in_service, +#' x = field_data$mileage, +#' time = field_data$dis, #' distribution = "exponential" #' ) #' +#' @md +#' #' @export -dist_mileage <- function(mileage, - time, - distribution = c("lognormal", "exponential") +dist_mileage.default <- function(x, + time, + distribution = c("lognormal", "exponential"), + ... ) { + # Checks: + ## Distribution check: distribution <- match.arg(distribution) - # Check for negative mileage, stop if TRUE: - if (any(mileage < 0, na.rm = TRUE)) { - stop("There is at least one negative element in argument 'mileage'!") + ## Check for negative mileage, stop if TRUE: + if (any(x < 0, na.rm = TRUE)) { + stop( + "Elements with negative distances are not meaningful and must be removed!" + ) } + # Do dist_mileage_(): + dist_mileage_( + x = x, + time = time, + distribution = distribution + ) +} + + + +# Helper function that performs the estimation of an annual mileage distribution: +dist_mileage_ <- function(x, + time, + distribution +) { + # Defining annual distance variable (for estimation) and origin variable (output): - miles_annual <- miles_annual_origin <- (mileage / time) * 365 + miles_annual <- miles_annual_origin <- (x / time) * 365 # Checks: - ## case of Inf, i.e. x is 0: could be handled with `is.infinite()` + + ## case of Inf, i.e. time is 0: could be handled with `is.infinite()` if (any(is.infinite(miles_annual))) { - warning("At least one computed annual distance is infinite and is ignored", - " for the estimation step!") + warning( + "At least one computed annual distance is infinite and is ignored ", + "for the estimation step!", + ) miles_annual <- miles_annual[!is.infinite(miles_annual)] } ## all NA: if (all(is.na(miles_annual))) { - stop("All computed annual distances are NA. No parameters can be estimated!") + stop( + "All computed annual distances are 'NA'. No parameters can be estimated!" + ) } + ## any or all annual distances are smaller or equal to zero: if (any(miles_annual <= 0, na.rm = TRUE)) { + if (all(miles_annual <= 0, na.rm = TRUE)) { ### all: - stop("All computed annual distances are smaller or equal to 0. No", - " parameters can be estimated!") + stop( + "All computed annual distances are smaller or equal to 0. ", + "No parameters can be estimated!" + ) } else { ### any: - warning("At least one computed annual distance is smaller or equal to 0", - " and is ignored for the estimation step!") + warning( + "At least one computed annual distance is smaller or equal to 0 ", + "and is ignored for the estimation step!" + ) + miles_annual <- miles_annual[miles_annual > 0] } } @@ -147,209 +217,22 @@ dist_mileage <- function(mileage, distribution = distribution ) - class(dist_output) <- c("mileage_estimation", class(dist_output)) + class(dist_output) <- c("wt_mileage_estimation", class(dist_output)) return(dist_output) } -#' Simulation of Unknown Covered Distances using a Monte Carlo Approach -#' -#' @description -#' This function simulates distances for units where these are unknown, i.e. -#' \code{mileage = NA}. -#' -#' First, random numbers of the annual mileage distribution, estimated by -#' \code{\link{dist_mileage}}, are drawn. Second, the drawn annual distances are -#' converted with respect to the actual operating times (in days) using a linear -#' relationship. See 'Details'. -#' -#' @details -#' \strong{Assumption of linear relationship}: Imagine the distance of the vehicle -#' is unknown. A distance of 3500.25 kilometers (km) was drawn from the annual -#' distribution and the known operating time is 200 days (d). So the resulting -#' distance of this vehicle is \deqn{3500.25 km \cdot (\frac{200 d} {365 d}) = 1917.945 km}{% -#' 3500.25 km * (200 d / 365 d) = 1917.945 km} -#' -#' @inheritParams dist_mileage -#' @param status Optional argument. If used it has to be a vector of binary data -#' (0 or 1) indicating whether unit i is a right censored observation (= 0) or -#' a failure (= 1). The effect of status on the return is described in 'Value'. -#' @param id A vector for the identification of every unit. -#' -#' @return A list containing the following elements: -#' \itemize{ -#' \item \code{data} : A tibble with classes \code{wt_mcs_data} and -#' \code{wt_reliability_data} if \code{status} is provided. Since the -#' class \code{wt_reliability_data} enables the direct usage of -#' \code{data} inside -#' \code{\link[=estimate_cdf]{estimate_cdf.wt_reliability_data}}, the -#' required lifetime characteristic is automatically set to the distance -#' \code{mileage}. -#' -#' If \code{status = NULL} class is \code{wt_mcs_data}, which is not -#' supported by \code{estimate_cdf} due to missing \code{status}. -#' -#' The tibble contains the following columns: -#' \itemize{ -#' \item \code{x} : Simulated distances for unknown \code{mileage} and -#' input distances for known \code{mileage}. -#' \item \code{time} : Input operating times. -#' \item \code{status} (\strong{optional}) : -#' \itemize{ -#' \item If argument \code{status = NULL} column \code{status} does -#' not exist. -#' \item If argument \code{status} is provided the column contains -#' the entered binary data (0 or 1). -#' } -#' \item \code{id} : Identification of every unit. -#' } -#' \item \code{sim_data} : A tibble with column \code{sim_mileage} that holds the -#' simulated distances for unknown \code{mileage} and \code{0} otherwise. -#' \item \code{model_estimation} : A list containing a named list -#' (\code{"mileage_distribution"}) with output of \code{\link{dist_mileage}}. -#' } -#' -#' @seealso \code{\link{estimate_cdf}} -#' -#' @examples -#' # Data for examples: -#' date_of_registration <- c("2014-08-17", "2014-03-29", "2014-12-06", -#' "2014-09-09", "2014-05-14", "2014-07-01", -#' "2014-06-16", "2014-04-03", "2014-05-23", -#' "2014-05-09", "2014-05-31", "2014-08-12", -#' "2014-04-13", "2014-02-15", "2014-07-07", -#' "2014-03-12", "2014-05-27", "2014-06-02", -#' "2014-05-20", "2014-03-21", "2014-06-19", -#' "2014-02-12", "2014-03-27") -#' date_of_repair <- c(NA, "2014-09-15", "2015-07-04", "2015-04-10", NA, -#' NA, "2015-04-24", NA, "2015-04-25", "2015-04-24", -#' "2015-06-12", NA, "2015-05-04", NA, NA, "2015-05-22", -#' NA, "2015-09-17", NA, "2015-08-15", "2015-11-26", -#' NA, NA) -#' date_of_analysis <- "2015-12-31" -#' -#' ## Assume that mileage is only known for units that have failed (date_of_repair != NA). -#' mileage <- c(NA, 15655, 13629, 18292, NA, NA, 33555, NA, 21737, -#' 29870, 21068, NA, 122283, NA, NA, 36088, NA, 11153, -#' NA, 122842, 20349, NA, NA) -#' -#' ## time in service is the difference between repair and registration for failed -#' ## items and the difference between date of analysis and date of registration -#' ## for intact units. -#' time_in_service <- difftime( -#' as.Date(date_of_repair, format = "%Y-%m-%d"), -#' as.Date(date_of_registration, format = "%Y-%m-%d"), -#' units = "days" -#' ) -#' time_in_service[is.na(time_in_service)] <- difftime( -#' as.Date(date_of_analysis, format = "%Y-%m-%d"), -#' as.Date(date_of_registration[is.na(time_in_service)], format = "%Y-%m-%d"), -#' units = "days" -#' ) -#' time_in_service <- as.numeric(time_in_service) -#' -#' # Example 1 - Reproducibility of drawn random numbers: -#' set.seed(1234) -#' mcs_distances <- mcs_mileage( -#' mileage = mileage, -#' time = time_in_service, -#' distribution = "lognormal" -#' ) -#' -#' # Example 2 - MCS for distances assuming a exponential annual mileage distribution: -#' mcs_distances_2 <- mcs_mileage( -#' mileage = mileage, -#' time = time_in_service, -#' distribution = "exponential" -#' ) -#' -#' status <- ifelse(!is.na(date_of_repair), 1, 0) -#' -#' # Example 3 - MCS for distances using status: -#' mcs_distances_3 <- mcs_mileage( -#' mileage = mileage, -#' time = time_in_service, -#' status = status, -#' distribution = "lognormal" -#' ) -#' -#' ## Using result of *$data in estimate_cdf() -#' prob_estimation <- estimate_cdf( -#' x = mcs_distances_3$data, -#' methods = "kaplan" -#' ) -#' -#' plot_prob_estimation <- plot_prob(prob_estimation) -#' #' @export -mcs_mileage <- function(mileage, - time, - status = NULL, - id = paste0("ID", seq_len(length(time))), - distribution = c("lognormal", "exponential") +print.wt_mileage_estimation <- function(x, + digits = max( + 3L, + getOption("digits") - 3L + ), + ... ) { - - # Checks: - ## Check for distributions: - distribution <- match.arg(distribution) - - ## Check for different length in time and mileage: - if (length(mileage) != length(time)) { - stop("Elements of 'mileage' and 'time' differ in lengths!") - } - - # Step 1: Parameter estimation using complete cases: - par_list <- dist_mileage( - mileage = mileage, - time = time, - distribution = distribution - ) - - # Step 2: Simulation of random numbers: - sim_nums <- mcs_helper( - x = mileage, - par_list = par_list - ) - - ## Imputation of missing mileages: - mileage[is.na(mileage)] <- (sim_nums[is.na(mileage)] * time[is.na(mileage)]) / 365 - - # Defining data_tbl with class "wt_mcs_data" and/or "wt_reliability_data": - if (purrr::is_null(status)) { - data_tbl <- tibble::tibble( - x = mileage, - time = time, - id = id - ) - class(data_tbl) <- c("wt_mcs_data", class(data_tbl)) - - } else { - # check for status: - if (!is_status(status)) { - stop("'status' must be numeric with elements 0 or 1!") - } - - data_tbl <- tibble::tibble( - x = mileage, - time = time, - status = status, - id = id - ) - - class(data_tbl) <- c("wt_mcs_data", "wt_reliability_data", class(data_tbl)) - - attr(data_tbl, "characteristic") <- "mileage" - } - - mcs_output <- list( - data = data_tbl, - sim_data = tibble::tibble(sim_mileage = sim_nums), - model_estimation = list( - mileage_distribution = par_list - ) - ) - - return(mcs_output) + cat("Coefficients:\n") + print(format(stats::coef(x), digits = digits), print.gap = 2L, quote = FALSE) + invisible(x) } diff --git a/R/utils.R b/R/utils.R index 226b0ee..1d43caf 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,3 +1,23 @@ two_parametric <- function(distribution) { sub("3", "", distribution) } + + + +check_dots <- function(...) { + dots <- list(...) + + if (length(dots) > 1) { + stop( + "'...' is not used. Arguments must be matched by name!", + call. = FALSE + ) + } +} + +check_dates <- function(mydate, + date.format = "%Y-%m-%d" +){ + tryCatch(!is.na(as.Date(mydate, date.format)), + error = function(err) {FALSE}) +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 315e13e..471f67b 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -55,14 +55,18 @@ reference: - dist_delay - dist_delay.default - mcs_delay + - mcs_delay.default - dist_mileage + - dist_mileage.default - mcs_mileage + - mcs_mileage.default - title: "Data Sets" desc: "Data sets used in the examples" - contents: - alloy - shock - voltage + - field_data - title: "Deprecated" desc: > The following functions are deprecated and will be removed in a future diff --git a/docs/articles/Life_Data_Analysis_Part_I.html b/docs/articles/Life_Data_Analysis_Part_I.html index d2e9cb9..4e5268d 100644 --- a/docs/articles/Life_Data_Analysis_Part_I.html +++ b/docs/articles/Life_Data_Analysis_Part_I.html @@ -93,7 +93,7 @@

A Non-parametric Approach

Tim-Gunnar Hensel

David Barkemeyer

-

2021-02-01

+

2021-02-08

Source: vignettes/Life_Data_Analysis_Part_I.Rmd @@ -114,7 +114,7 @@

But rank distributions are systematically skewed distributions and thus the median value instead of the expected value \(E\left[F\left(t_i\right)\right] = \frac{i}{n + 1}\) is used for the estimation 1. This skewness is visualized in Figure 1.

 library(dplyr) # data manipulation 
-library(ggplot2) # visualization
+library(ggplot2) # visualization
 
 x <- seq(0, 1, length.out = 100) # CDF
 n <- 10 # sample size
@@ -190,7 +190,7 @@ 

 shock_tbl <- reliability_data(data = shock, x = distance, status = status)
 shock_tbl
-#> Reliability Data:
+#> Reliability Data with characteristic x: 'distance':
 #> # A tibble: 38 x 3
 #>        x status id   
 #>    <int>  <dbl> <chr>
diff --git a/docs/articles/Life_Data_Analysis_Part_II.html b/docs/articles/Life_Data_Analysis_Part_II.html
index ff5c3d0..08dbbad 100644
--- a/docs/articles/Life_Data_Analysis_Part_II.html
+++ b/docs/articles/Life_Data_Analysis_Part_II.html
@@ -93,7 +93,7 @@ 

Rank Regression and Maximum Likelihood

Tim-Gunnar Hensel

David Barkemeyer

-

2021-02-01

+

2021-02-08

Source: vignettes/Life_Data_Analysis_Part_II.Rmd @@ -157,7 +157,7 @@

 shock_tbl <- reliability_data(data = shock, x = distance, status = status)
 shock_tbl
-#> Reliability Data:
+#> Reliability Data with characteristic x: 'distance':
 #> # A tibble: 38 x 3
 #>        x status id   
 #>    <int>  <dbl> <chr>
@@ -182,7 +182,7 @@ 

# Data: alloy_tbl <- reliability_data(data = alloy, x = cycles, status = status) alloy_tbl -#> Reliability Data: +#> Reliability Data with characteristic x: 'cycles': #> # A tibble: 72 x 3 #> x status id #> <dbl> <dbl> <chr> diff --git a/docs/articles/Life_Data_Analysis_Part_III.html b/docs/articles/Life_Data_Analysis_Part_III.html index 4b7db3d..4c1b36f 100644 --- a/docs/articles/Life_Data_Analysis_Part_III.html +++ b/docs/articles/Life_Data_Analysis_Part_III.html @@ -93,7 +93,7 @@

Segmented Regression and EM Algorithm

Tim-Gunnar Hensel

David Barkemeyer

-

2021-02-01

+

2021-02-08

Source: vignettes/Life_Data_Analysis_Part_III.Rmd @@ -113,7 +113,7 @@

 voltage_tbl <- reliability_data(data = voltage, x = hours, status = status)
 voltage_tbl
-#> Reliability Data:
+#> Reliability Data with characteristic x: 'hours':
 #> # A tibble: 58 x 3
 #>        x status id   
 #>    <dbl>  <dbl> <chr>
diff --git a/docs/news/index.html b/docs/news/index.html
index 3460155..4451b51 100644
--- a/docs/news/index.html
+++ b/docs/news/index.html
@@ -139,37 +139,102 @@ 

Breaking Changes

+
+

+Parametric Models

  • -confint_betabinom() and confint_fisher(): Removed constant features distribution, bounds and direction from the tibble output and added them as attributes instead.
  • -
  • plot_prob.wt_model(): Removed dysfunctional argument distribution. The distribution is inferred using the model x.
  • +
+
+
+

+Confidence Intervals

+ +
+
+

+Monte Carlo Simulation

+
+

New Features

+
+

+Confidence Intervals

+
+

+Monte Carlo Simulation

+ +
+

Minor Improvements and bug fixes

+
+

+Reliability Data

+
+

+Confidence Intervals

+ +
+
+

+Monte Carlo Simulation

+
    +
  • The object returned by mcs_mileage() now has class wt_mcs_mileage.
  • +
  • The object returned by mcs_delay() now has class wt_mcs_delay.
  • +
  • The object returned by dist_mileage() now has class wt_mileage_estimation.
  • +
  • The object returned by dist_delay() now has class wt_delay_estimation or wt_delay_estimation_list.
  • +
+
+

Documentation improvements

@@ -207,9 +272,9 @@

plot_prob_mix(): Deprecated, use plot_prob() instead. Removed default value NULL for argument mix_output. Renamed event with status.

-
+

-Parametric Models

+Parametric Models

  • ml_estimation.default() (former ml_estimation()): Renamed event with status. Removed details. Changed names and contents of list elements in output. See ?ml_estimation.
  • @@ -233,9 +298,9 @@

    plot_pop(): Added argument tol to restrict the range of failure probabilities. Removed argument color. Renamed argument params to dist_params_tbl, which only supports location and scale parameters (also for distribution = "weibull"). Changed behavior of dist_params_tbl: A tibble is now recommended instead of a vector.

-
+

-Confidence Intervals

+Confidence Intervals

-
+

-Monte Carlo Simulation

+Monte Carlo Simulation

  • dist_delay_register(): Deprecated, use dist_delay() instead.
  • diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index eed1b46..4a0dfe4 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -5,5 +5,5 @@ articles: Life_Data_Analysis_Part_I: Life_Data_Analysis_Part_I.html Life_Data_Analysis_Part_II: Life_Data_Analysis_Part_II.html Life_Data_Analysis_Part_III: Life_Data_Analysis_Part_III.html -last_built: 2021-02-01T16:33Z +last_built: 2021-02-08T11:04Z diff --git a/docs/reference/dist_delay.default.html b/docs/reference/dist_delay.default.html new file mode 100644 index 0000000..ba394d0 --- /dev/null +++ b/docs/reference/dist_delay.default.html @@ -0,0 +1,260 @@ + + + + + + + + +Parameter Estimation of a Delay Distribution — dist_delay.default • weibulltools + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    This function models a delay (in days) random variable (e.g. in logistic, +registration, report) using a supposed continuous distribution. First, the +element-wise differences in days of both vectors date_1 and date_2 are +calculated and then the parameter(s) of the assumed +distribution is (are) estimated with maximum likelihood. See 'Details' for +more information.

    +
    + +
    # S3 method for default
    +dist_delay(..., date_1, date_2, distribution = c("lognormal", "exponential"))
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + +
    ...

    Further arguments passed to or from other methods. Currently not used.

    date_1

    A vector of class character or Date, in the format "yyyy-mm-dd", +representing the earlier of the two dates belonging to a particular delay. +Use NA for missing elements.

    +

    If more than one delay is to be considered, use a list where the first element +is the earlier date of the first delay, the second element is the earlier date +of the second delay, and so forth (see 'Examples').

    date_2

    A vector of class character or Date in the format "yyyy-mm-dd". +date_2 is the counterpart of date_1 and is used the same as date_1, just with +the later date(s) of the particular delay(s). Use NA for missing elements.

    distribution

    Supposed distribution of the respective delay.

    + +

    Value

    + +

    A list with class wt_delay_estimation which contains:

      +
    • coefficients : A named vector of estimated parameter(s).

    • +
    • delay : A numeric vector of element-wise computed differences in days.

    • +
    • distribution : Specified distribution.

    • +
    + +

    If more than one delay was considered, the resulting output is a list with class +wt_delay_estimation_list. In this case each list element has class +wt_delay_estimation and the items listed above, are included.

    +

    Details

    + +

    The distribution parameter(s) is (are) determined on the basis of complete +cases, i.e. there is no NA in one of the related vector elements +c(date_1[i], date_2[i]). Time differences less than or equal to zero are +not considered as well.

    +

    See also

    + + + +

    Examples

    +
    # Example 1 - Delay in registration: +params_delay_regist <- dist_delay( + date_1 = field_data$production_date, + date_2 = field_data$registration_date, + distribution = "lognormal" +) + +# Example 2 - Delay in report: +params_delay_report <- dist_delay( + date_1 = field_data$repair_date, + date_2 = field_data$report_date, + distribution = "exponential" +) +
    #> Warning: At least one of the date differences is smaller or equal to 0 and is ignored for the estimation step!
    +# Example 3 - Delays in registration and report with same distribution: +params_delays <- dist_delay( + date_1 = list(field_data$production_date, field_data$repair_date), + date_2 = list(field_data$registration_date, field_data$report_date), + distribution = "lognormal" +) +
    #> Warning: At least one of the date differences is smaller or equal to 0 and is ignored for the estimation step!
    +# Example 4 - Delays in registration and report with different distributions: +params_delays_2 <- dist_delay( + date_1 = list(field_data$production_date, field_data$repair_date), + date_2 = list(field_data$registration_date, field_data$report_date), + distribution = c("lognormal", "exponential") +) +
    #> Warning: At least one of the date differences is smaller or equal to 0 and is ignored for the estimation step!
    +
    +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/dist_delay.html b/docs/reference/dist_delay.html index 5b8d223..8614cd7 100644 --- a/docs/reference/dist_delay.html +++ b/docs/reference/dist_delay.html @@ -42,10 +42,9 @@ +row-wise differences in days of the related date columns are calculated and then +the parameter(s) of the assumed distribution is (are) estimated with maximum +likelihood. See 'Details' for more information." /> @@ -142,92 +141,106 @@

    Parameter Estimation of a Delay Distribution

    This function models a delay (in days) random variable (e.g. in logistic, registration, report) using a supposed continuous distribution. First, the -element-wise differences in days of both vectors date_1 and -date_2 are calculated and then the parameter(s) of the assumed -distribution are estimated with maximum likelihood. See 'Details' for more -information.

    +row-wise differences in days of the related date columns are calculated and then +the parameter(s) of the assumed distribution is (are) estimated with maximum +likelihood. See 'Details' for more information.

    -
    dist_delay(date_1, date_2, distribution = c("lognormal", "exponential"))
    +
    dist_delay(...)
    +
    +# S3 method for wt_mcs_delay_data
    +dist_delay(..., x, distribution = c("lognormal", "exponential"))

    Arguments

    - - + + - - + + - +
    date_1

    A vector of class character or Date, in the -format "yyyy-mm-dd", indicating the earlier of the two dates. Use NA -for missing elements.

    ...

    Further arguments passed to or from other methods. Currently not used.

    date_2

    A vector of class character or Date, in the -format "yyyy-mm-dd", indicating the later of the two dates. Use NA -for missing elements.

    x

    A tibble of class wt_mcs_delay_data returned by mcs_delay_data.

    distribution

    Supposed distribution of the random variable.

    Supposed distribution of the respective delay.

    Value

    -

    A list of class delay_estimation which contains:

      +

      A list with class wt_delay_estimation which contains:

      • coefficients : A named vector of estimated parameter(s).

      • -
      • delay : A numeric vector of element-wise computed differences - in days.

      • +
      • delay : A numeric vector of element-wise computed differences in days.

      • distribution : Specified distribution.

      +

      If more than one delay was considered in mcs_delay_data, the resulting output +is a list with class wt_delay_estimation_list. In this case each list element +has class wt_delay_estimation and the items listed above, are included.

      Details

      -

      The distribution parameter(s) are determined on the basis of complete cases, -i.e. there is no NA in one of the related vector elements -c(date_1[i], date_2[i]). Time differences less than or equal to zero are -not considered as well.

      +

      The distribution parameter(s) is (are) determined on the basis of complete +cases, i.e. there is no NA (row-wise) in one of the related date columns. +Time differences less than or equal to zero are not considered as well.

      Examples

      -
      # Example 1 - Delay in registration: -date_of_production <- c("2014-07-28", "2014-02-17", "2014-07-14", - "2014-06-26", "2014-03-10", "2014-05-14", - "2014-05-06", "2014-03-07", "2014-03-09", - "2014-04-13", "2014-05-20", "2014-07-07", - "2014-01-27", "2014-01-30", "2014-03-17", - "2014-02-09", "2014-04-14", "2014-04-20", - "2014-03-13", "2014-02-23", "2014-04-03", - "2014-01-08", "2014-01-08") -date_of_registration <- c(NA, "2014-03-29", "2014-12-06", "2014-09-09", - NA, NA, "2014-06-16", NA, "2014-05-23", - "2014-05-09", "2014-05-31", NA, "2014-04-13", - NA, NA, "2014-03-12", NA, "2014-06-02", - NA, "2014-03-21", "2014-06-19", NA, NA) +
      # MCS data preparation: +## Data for delay in registration: +mcs_tbl_1 <- mcs_delay_data( + field_data, + date_1 = production_date, + date_2 = registration_date, + time = dis, + status = status, + id = vin +) + +## Data for delay in report: +mcs_tbl_2 <- mcs_delay_data( + field_data, + date_1 = repair_date, + date_2 = report_date, + time = dis, + status = status, + id = vin +) + +## Data for both delays: +mcs_tbl_both <- mcs_delay_data( + field_data, + date_1 = c(production_date, repair_date), + date_2 = c(registration_date, report_date), + time = dis, + status = status, + id = vin +) +# Example 1 - Delay in registration: params_delay_regist <- dist_delay( - date_1 = date_of_production, - date_2 = date_of_registration, + x = mcs_tbl_1, distribution = "lognormal" ) # Example 2 - Delay in report: -date_of_repair <- c(NA, "2014-09-15", "2015-07-04", "2015-04-10", NA, - NA, "2015-04-24", NA, "2015-04-25", "2015-04-24", - "2015-06-12", NA, "2015-05-04", NA, NA, - "2015-05-22", NA, "2015-09-17", NA, "2015-08-15", - "2015-11-26", NA, NA) - -date_of_report <- c(NA, "2014-10-09", "2015-08-28", "2015-04-15", NA, - NA, "2015-05-16", NA, "2015-05-28", "2015-05-15", - "2015-07-11", NA, "2015-08-14", NA, NA, - "2015-06-05", NA, "2015-10-17", NA, "2015-08-21", - "2015-12-02", NA, NA) - params_delay_report <- dist_delay( - date_1 = date_of_repair, - date_2 = date_of_report, + x = mcs_tbl_2, distribution = "exponential" ) - +
      #> Warning: At least one of the date differences is smaller or equal to 0 and is ignored for the estimation step!
      +# Example 3 - Delays in registration and report with same distribution: +params_delays <- dist_delay( + x = mcs_tbl_both, + distribution = "lognormal" +) +
      #> Warning: At least one of the date differences is smaller or equal to 0 and is ignored for the estimation step!
      +# Example 4 - Delays in registration and report with different distributions: +params_delays_2 <- dist_delay( + x = mcs_tbl_both, + distribution = c("lognormal", "exponential") +) +
      #> Warning: At least one of the date differences is smaller or equal to 0 and is ignored for the estimation step!
      -
      dist_mileage(mileage, time, distribution = c("lognormal", "exponential"))
      +
      dist_mileage(x, distribution, ...)
      +
      +# S3 method for wt_mcs_mileage_data
      +dist_mileage(x, distribution = c("lognormal", "exponential"), ...)

      Arguments

      - - + + - - + + - - + +
      mileage

      A numeric vector of distances covered. Use NA for missing -elements.

      x

      A tibble of class wt_mcs_mileage_data returned by mcs_mileage_data.

      time

      A numeric vector of operating times. Use NA for missing -elements.

      distribution

      Supposed distribution of the annual mileage.

      distribution

      Supposed distribution of the random variable.

      ...

      Further arguments passed to or from other methods. Currently not used.

      Value

      -

      A list of class mileage_estimation which contains:

        +

        A list with class wt_mileage_estimation which contains:

        • coefficients : A named vector of estimated parameter(s).

        • -
        • miles_annual : A numeric vector of element-wise computed annual - distances using the linear relationship described in 'Details'.

        • +
        • miles_annual : A numeric vector of element-wise computed annual distances +using the linear relationship described in 'Details'.

        • distribution : Specified distribution.

        Details

        -

        The distribution parameter(s) are determined on the basis of complete cases, -i.e. there is no NA in one of the related vector elements -c(mileage[i], time[i]). Distances and operating times less than or equal -to zero are not considered as well.

        +

        The distribution parameter(s) is (are) determined on the basis of complete +cases, i.e. there is no NA (row-wise) in one of the related columns mileage +and time. Distances and operating times less than or equal to zero are not +considered as well.

        Assumption of linear relationship: Imagine a component in a vehicle - has endured a distance of 25000 kilometers (km) in 500 days (d), the annual - distance of this unit is $$25000 km \cdot (\frac{365 d} {500 d}) = 18250 km$$

        +has endured a distance of 25000 kilometers (km) in 500 days (d), the annual +distance of this unit is $$25000 km \cdot (\frac{365 d} {500 d}) = 18250 km$$

        Examples

        -
        # Data for examples: -date_of_registration <- c("2014-08-17", "2014-03-29", "2014-12-06", - "2014-09-09", "2014-05-14", "2014-07-01", - "2014-06-16", "2014-04-03", "2014-05-23", - "2014-05-09", "2014-05-31", "2014-08-12", - "2014-04-13", "2014-02-15", "2014-07-07", - "2014-03-12", "2014-05-27", "2014-06-02", - "2014-05-20", "2014-03-21", "2014-06-19", - "2014-02-12", "2014-03-27") -date_of_repair <- c(NA, "2014-09-15", "2015-07-04", "2015-04-10", NA, - NA, "2015-04-24", NA, "2015-04-25", "2015-04-24", - "2015-06-12", NA, "2015-05-04", NA, NA, "2015-05-22", - NA, "2015-09-17", NA, "2015-08-15", "2015-11-26", - NA, NA) -date_of_analysis <- "2015-12-31" - -## Assume that mileage is only known for units that have failed (date_of_repair != NA). -mileage <- c(NA, 15655, 13629, 18292, NA, NA, 33555, NA, 21737, - 29870, 21068, NA, 122283, NA, NA, 36088, NA, 11153, - NA, 122842, 20349, NA, NA) - -## time in service is the difference between repair and registration for failed -## items and the difference between date of analysis and date of registration -## for intact units. -time_in_service <- difftime( - as.Date(date_of_repair, format = "%Y-%m-%d"), - as.Date(date_of_registration, format = "%Y-%m-%d"), - units = "days" -) -time_in_service[is.na(time_in_service)] <- difftime( - as.Date(date_of_analysis, format = "%Y-%m-%d"), - as.Date(date_of_registration[is.na(time_in_service)], format = "%Y-%m-%d"), - units = "days" +
        # MCS data preparation: +mcs_tbl <- mcs_mileage_data( + field_data, + mileage = mileage, + time = dis, + status = status, + id = vin ) -time_in_service <- as.numeric(time_in_service) # Example 1 - Assuming lognormal annual mileage distribution: params_mileage_annual <- dist_mileage( - mileage = mileage, - time = time_in_service, + x = mcs_tbl, distribution = "lognormal" ) # Example 2 - Assuming exponential annual mileage distribution: params_mileage_annual_2 <- dist_mileage( - mileage = mileage, - time = time_in_service, + x = mcs_tbl, distribution = "exponential" ) diff --git a/docs/reference/estimate_cdf.default.html b/docs/reference/estimate_cdf.default.html index 9e9afdf..222a057 100644 --- a/docs/reference/estimate_cdf.default.html +++ b/docs/reference/estimate_cdf.default.html @@ -188,17 +188,16 @@

        Arg

        Value

        -

        A tibble containing the following columns:

          +

          A tibble with class wt_cdf_estimation containing the following columns:

          • id : Identification for every unit.

          • x : Lifetime characteristic.

          • -
          • status : Binary data (0 or 1) indicating whether a unit is a - right censored observation (= 0) or a failure (= 1).

          • -
          • rank : The (computed) ranks. Determined for methods "mr" - and "johnson", filled with NA for other methods or if - status = 0.

          • +
          • status : Binary data (0 or 1) indicating whether a unit is a right +censored observation (= 0) or a failure (= 1).

          • +
          • rank : The (computed) ranks. Determined for methods "mr" and "johnson", +filled with NA for other methods or if status = 0.

          • prob : Estimated failure probabilities, NA if status = 0.

          • -
          • cdf_estimation_method : Specified method for the estimation of - failure probabilities.

          • +
          • cdf_estimation_method : Specified method for the estimation of failure +probabilities.

          Details

          @@ -247,7 +246,7 @@

          R December 3, 2020

          See also

          - +

          Examples

          # Vectors: diff --git a/docs/reference/estimate_cdf.html b/docs/reference/estimate_cdf.html index 7c71d28..80e24a5 100644 --- a/docs/reference/estimate_cdf.html +++ b/docs/reference/estimate_cdf.html @@ -157,7 +157,7 @@

          Arg x -

          A tibble returned by reliability_data.

          +

          A tibble of class wt_reliability_data returned by reliability_data.

          ... @@ -165,9 +165,8 @@

          Arg methods -

          One or multiple methods of "mr", "johnson", -"kaplan" or "nelson" used for the estimation of failure -probabilities. See 'Details'.

          +

          One or multiple methods of "mr", "johnson", "kaplan" or +"nelson" used for the estimation of failure probabilities. See 'Details'.

          options @@ -177,17 +176,16 @@

          Arg

          Value

          -

          A tibble containing the following columns:

            +

            A tibble with class wt_cdf_estimation containing the following columns:

            • id : Identification for every unit.

            • x : Lifetime characteristic.

            • -
            • status : Binary data (0 or 1) indicating whether a unit is a - right censored observation (= 0) or a failure (= 1).

            • -
            • rank : The (computed) ranks. Determined for methods "mr" - and "johnson", filled with NA for other methods or if - status = 0.

            • +
            • status : Binary data (0 or 1) indicating whether a unit is a right +censored observation (= 0) or a failure (= 1).

            • +
            • rank : The (computed) ranks. Determined for methods "mr" and "johnson", +filled with NA for other methods or if status = 0.

            • prob : Estimated failure probabilities, NA if status = 0.

            • -
            • cdf_estimation_method : Specified method for the estimation of - failure probabilities.

            • +
            • cdf_estimation_method : Specified method for the estimation of failure +probabilities.

            Details

            diff --git a/docs/reference/field_data.html b/docs/reference/field_data.html new file mode 100644 index 0000000..4c6569f --- /dev/null +++ b/docs/reference/field_data.html @@ -0,0 +1,235 @@ + + + + + + + + +Field Data — field_data • weibulltools + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            +
            + + + + +
            + +
            +
            + + +
            +

            An illustrative field dataset that contains a variety of variables commonly +collected in the automotive sector.

            +

            The dataset has complete information about failed and incomplete information +about intact vehicles. See 'Format' and 'Details' for further insights.

            +
            + +
            field_data
            + + +

            Format

            + +

            A tibble with 10,684 rows and 20 variables:

            +
            vin

            Vehicle identification number.

            +
            dis

            Days in service.

            +
            mileage

            Distances covered, which are unknown for censored units.

            +
            status

            1 for failed and 0 for censored units.

            +
            production_date

            Date of production.

            +
            registration_date

            Date of registration. Known for all failed units and + for a few intact units.

            +
            repair_date

            The date on which the failure was repaired. It is assumed + that the repair date is equal to the date of failure occurrence.

            +
            report_date

            The date on which lifetime information about the failure + were available.

            +
            country

            Delivering country.

            +
            region

            The region within the country of delivery. Known for registered + vehicles, NA for units with a missing registration_date.

            +
            climatic_zone

            Climatic zone based on "Köppen-Geiger" climate classification. + Known for registered vehicles, NA for units with a missing registration_date.

            +
            climatic_subzone

            Climatic subzone based on "Köppen-Geiger" climate classification. + Known for registered vehicles, NA for units with a registration_date.

            +
            brand

            Brand of the vehicle.

            +
            vehicle_model

            Model of the vehicle.

            +
            engine_type

            Type of the engine.

            +
            engine_date

            Date where the engine was installed.

            +
            gear_type

            Type of the gear.

            +
            gear_date

            Date where the gear was installed.

            +
            transmission

            Transmission of the vehicle.

            +
            fuel

            Vehicle fuel.

            + +
            + +

            Details

            + +

            All vehicles were produced in 2014 and an analysis of the field data was +made at the end of 2015. At the date of analysis, there were 684 failed and +10,000 intact vehicles.

            +

            Censored vehicles:

            +

            For censored units the service time (dis) was computed as the difference +of the date of analysis "2015-12-31" and the registration_date.

            +

            For many units the latter date is unknown. For these, the difference of the +analysis date and production_date was used to get a rough estimation of +the real service time. This uncertainty has to be considered in the subsequent +analysis (see delay in registration in the section 'Details' of +mcs_delay).

            +

            Furthermore, due to the delay in report, the computed service time could also +be inaccurate. This uncertainty should be considered as well (see +delay in report in the section 'Details' of mcs_delay).

            +

            The lifetime characteristic mileage is unknown for all censored units. +If an analysis is to be made for this lifetime characteristic, covered distances +for these units have to be estimated (see mcs_mileage).

            +

            Failed vehicles: +For failed units the service time (dis) is computed as the difference +of repair_date and registration_date, which are known for all of them.

            +

            See also

            + + + +
            + +
            + + +
            + + +
            +

            Site built with pkgdown 1.6.1.

            +
            + +
            +
            + + + + + + + + diff --git a/docs/reference/index.html b/docs/reference/index.html index 12e6346..1ca09b0 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -158,6 +158,31 @@

            +

            MCS Data

            +

            Functions for converting data into MCS data

            + + + + + + + + + + +

            mcs_delay_data()

            + +

            MCS Delay Data

            + + + +

            mcs_mileage_data()

            + +

            MCS Mileage Data

            + @@ -385,22 +410,46 @@

            dist_delay(<default>)

            + +

            Parameter Estimation of a Delay Distribution

            + +

            mcs_delay()

            Adjustment of Operating Times by Delays using a Monte Carlo Approach

            + +

            mcs_delay(<default>)

            + +

            Adjustment of Operating Times by Delays using a Monte Carlo Approach

            + +

            dist_mileage()

            Parameter Estimation of an Annual Mileage Distribution

            + +

            dist_mileage(<default>)

            + +

            Parameter Estimation of an Annual Mileage Distribution

            + +

            mcs_mileage()

            Simulation of Unknown Covered Distances using a Monte Carlo Approach

            + + + +

            mcs_mileage(<default>)

            + +

            Simulation of Unknown Covered Distances using a Monte Carlo Approach

            @@ -432,6 +481,12 @@

            voltage

            High Voltage Stress Test for the Dielectric Insulation of Generator armature bars

            + + + +

            field_data

            + +

            Field Data

            diff --git a/docs/reference/mcs_delay.default.html b/docs/reference/mcs_delay.default.html new file mode 100644 index 0000000..0ddb894 --- /dev/null +++ b/docs/reference/mcs_delay.default.html @@ -0,0 +1,352 @@ + + + + + + + + +Adjustment of Operating Times by Delays using a Monte Carlo Approach — mcs_delay.default • weibulltools + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            +
            + + + + +
            + +
            +
            + + +
            +

            In general, the amount of available information about units in the field is very +different. During the warranty period, there are only a few cases with complete +data (mainly failed units) but lots of cases with incomplete data (usually +censored units). As a result, the operating time of units with incomplete +information is often inaccurate and must be adjusted by delays.

            +

            This function reduces the operating times of incomplete observations by simulated +delays (in days). A unit is considered as incomplete if the later of the +related dates is unknown. See 'Details' for some practical examples.

            +

            Random delay numbers are drawn from the distribution determined by complete cases +(described in 'Details' of dist_delay).

            +
            + +
            # S3 method for default
            +mcs_delay(
            +  ...,
            +  date_1,
            +  date_2,
            +  time,
            +  status = NULL,
            +  id = paste0("ID", seq_len(length(time))),
            +  distribution = c("lognormal", "exponential")
            +)
            + +

            Arguments

            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            ...

            Further arguments passed to or from other methods. Currently not used.

            date_1

            A vector of class character or Date, in the format "yyyy-mm-dd", +representing the earlier of the two dates belonging to a particular delay. +Use NA for missing elements.

            +

            If more than one delay is to be considered, use a list where the first element +is the earlier date of the first delay, the second element is the earlier date +of the second delay, and so forth (see 'Examples').

            date_2

            A vector of class character or Date in the format "yyyy-mm-dd". +date_2 is the counterpart of date_1 and is used the same as date_1, just with +the later date(s) of the particular delay(s). Use NA for missing elements.

            time

            Operating times. Use NA for missing elements.

            status

            Optional argument. If used, it must contain binary data +(0 or 1) indicating whether a unit is a right censored observation (= 0) or a +failure (= 1).

            +

            If status is provided, class wt_reliability_data is assigned to the +output of mcs_delay, which enables the direct application of estimate_cdf +on operating times.

            id

            Identification of every unit.

            distribution

            Supposed distribution of the respective delay.

            + +

            Value

            + +

            A list with class wt_mcs_delay containing the following elements:

              +
            • data : A tibble returned by mcs_delay_data where two modifications +has been made:

                +
              • If the column status exists, the tibble has additional classes +wt_mcs_data and wt_reliability_data. Otherwise, the tibble only has +the additional class wt_mcs_data (which is not supported by estimate_cdf).

              • +
              • The column time is renamed to x (to be in accordance with +reliability_data) and contains the adjusted operating times for incomplete +observations and input operating times for the complete observations.

              • +
            • +
            • sim_data : A tibble with column sim_delay that holds the simulated +delay-specific numbers for incomplete cases and 0 for complete cases. +If more than one delay was considered multiple columns with names sim_delay_1, +sim_delay_2, ..., sim_delay_i and corresponding delay-specific random +numbers are presented.

            • +
            • model_estimation : A list returned by dist_delay.

            • +
            + +

            Details

            + +

            In field data analysis time-dependent characteristics (e.g. time in service) +are often imprecisely recorded. These inaccuracies are caused by unconsidered delays.

            +

            For a better understanding of the MCS application in the context of field data, +two cases are described below.

              +
            • Delay in registration: It is common that a supplier, which provides +parts to the manufacturing industry does not know when the unit, in which +its parts are installed, were put in service (due to unknown registration or +sales date (date_2)). Without taking the described delay into account, the +time in service of the failed units would be the difference between the +repair date and the production date (date_1) and for intact units the +difference between the present date and the production date. But the real +operating times are (much) shorter, since the stress on the components have +not started until the whole systems were put in service. Hence, units with +incomplete data (missing date_2) must be reduced by the delays.

            • +
            • Delay in report:: Authorized repairers often do not immediately +notify the manufacturer or OEM of repairs that were made during the warranty +period, but instead pass the information about these repairs in collected +forms e.g. weekly, monthly or quarterly. The resulting time difference between +the reporting (date_2) of the repair in the guarantee database and the +actual repair date (date_1), which is often assumed to be the failure +date, is called the reporting delay. For a given date where the analysis +is made there could be units which had a failure but the failure isn't +reported and therefore they are treated as censored units. In order to take +this into account and according to the principle of equal opportunities, the +lifetime of units with missing report date (date_2[i] = NA) is reduced by +simulated reporting delays.

            • +
            + +

            References

            + +

            Verband der Automobilindustrie e.V. (VDA); Qualitätsmanagement in +der Automobilindustrie. Zuverlässigkeitssicherung bei Automobilherstellern +und Lieferanten. Zuverlässigkeits-Methoden und -Hilfsmittel.; 4th Edition, 2016, +ISSN:0943-9412

            +

            See also

            + +

            dist_delay for the determination of a parametric delay distribution +and estimate_cdf for the estimation of failure probabilities.

            + +

            Examples

            +
            # Example 1 - MCS for delay in registration: +mcs_regist <- mcs_delay( + date_1 = field_data$production_date, + date_2 = field_data$registration_date, + time = field_data$dis, + status = field_data$status, + distribution = "lognormal" +) + +# Example 2 - MCS for delay in report: +mcs_report <- mcs_delay( + date_1 = field_data$repair_date, + date_2 = field_data$report_date, + time = field_data$dis, + status = field_data$status, + distribution = "exponential" +) +
            #> Warning: At least one of the date differences is smaller or equal to 0 and is ignored for the estimation step!
            +# Example 3 - Reproducibility of random numbers: +set.seed(1234) +mcs_report_reproduce <- mcs_delay( + date_1 = field_data$repair_date, + date_2 = field_data$report_date, + time = field_data$dis, + status = field_data$status, + distribution = "exponential" +) +
            #> Warning: At least one of the date differences is smaller or equal to 0 and is ignored for the estimation step!
            +# Example 4 - MCS for delays in registration and report with same distribution: +mcs_delays <- mcs_delay( + date_1 = list(field_data$production_date, field_data$repair_date), + date_2 = list(field_data$registration_date, field_data$report_date), + time = field_data$dis, + status = field_data$status, + distribution = "lognormal" +) +
            #> Warning: At least one of the date differences is smaller or equal to 0 and is ignored for the estimation step!
            +# Example 5 - MCS for delays in registration and report with different distributions: +## Assuming lognormal registration and exponential reporting delays. +mcs_delays_2 <- mcs_delay( + date_1 = list(field_data$production_date, field_data$repair_date), + date_2 = list(field_data$registration_date, field_data$report_date), + time = field_data$dis, + status = field_data$status, + distribution = c("lognormal", "exponential") +) +
            #> Warning: At least one of the date differences is smaller or equal to 0 and is ignored for the estimation step!
            +
            +
            + +
            + + +
            + + +
            +

            Site built with pkgdown 1.6.1.

            +
            + +
            +
            + + + + + + + + diff --git a/docs/reference/mcs_delay.html b/docs/reference/mcs_delay.html index 445df52..0e5be91 100644 --- a/docs/reference/mcs_delay.html +++ b/docs/reference/mcs_delay.html @@ -46,8 +46,8 @@ censored units). As a result, the operating time of units with incomplete information is often inaccurate and must be adjusted by delays. This function reduces the operating times of incomplete observations by simulated -delays (in days). A unit is considered as incomplete if the later of the two -dates is unknown, i.e. date_2 = NA. See 'Details' for some practical examples. +delays (in days). A unit is considered as incomplete if the later of the +related dates is unknown. See 'Details' for some practical examples. Random delay numbers are drawn from the distribution determined by complete cases (described in 'Details' of dist_delay)." /> @@ -139,7 +139,7 @@
            @@ -150,109 +150,52 @@

            Adjustment of Operating Times by Delays using a Monte Carlo Approach

            censored units). As a result, the operating time of units with incomplete information is often inaccurate and must be adjusted by delays.

            This function reduces the operating times of incomplete observations by simulated -delays (in days). A unit is considered as incomplete if the later of the two -dates is unknown, i.e. date_2 = NA. See 'Details' for some practical examples.

            +delays (in days). A unit is considered as incomplete if the later of the +related dates is unknown. See 'Details' for some practical examples.

            Random delay numbers are drawn from the distribution determined by complete cases -(described in 'Details' of dist_delay).

            +(described in 'Details' of dist_delay).

            -
            mcs_delay(
            -  date_1,
            -  date_2,
            -  time,
            -  status = NULL,
            -  id = paste0("ID", seq_len(length(time))),
            -  distribution = c("lognormal", "exponential")
            -)
            +
            mcs_delay(...)
            +
            +# S3 method for wt_mcs_delay_data
            +mcs_delay(..., x, distribution = c("lognormal", "exponential"))

            Arguments

            - - - - - - + + - - - - - - - - - - + + - +
            date_1

            A vector of class character or Date, in the - format "yyyy-mm-dd", indicating the earlier of the two dates. Use NA - for missing elements.

            -

            If more than one delay should be considered it must be a list where the first - element contains the earlier dates of the first delay and the second element - contains the earlier dates of the second delay, and so forth.(See 'Examples').

            date_2

            A vector of class character or Date, in the - format "yyyy-mm-dd", indicating the later of the two dates. Use NA - for missing elements.

            -

            If more than one delay should be considered it must be a list where the first - element contains the later dates of the first delay and the second element - contains the later dates of the second delay, and so forth. (See 'Examples').

            ...

            Further arguments passed to or from other methods. Currently not used.

            time

            A numeric vector of operating times. Use NA for missing elements.

            status

            Optional argument. If used it has to be a vector of binary -data (0 or 1) indicating whether unit i is a right censored observation -(= 0) or a failure (= 1). The effect of status on the return is described -in 'Value'.

            id

            A vector for the identification of every unit.

            x

            A tibble of class wt_mcs_delay_data returned by mcs_delay_data.

            distribution

            Supposed distribution of the delay random variable. If more -than one delay is to be considered and different distributions are assumed -for each delay, the argument distribution must have the same length -as list date_1 (and date_2). For example, in the case of -two delays with different distributions, one has to specify the argument as -distribution = c("lognormal", "exponential"). Then the lognormal -distribution is applied to the first delay and the exponential distribution -to the second (See 'Examples').

            Supposed distribution of the respective delay.

            Value

            -

            A list containing the following elements:

              -
            • data : A tibble with classes wt_mcs_data and - wt_reliability_data if status is provided. Since the - class wt_reliability_data enables the direct usage of - data inside - estimate_cdf.wt_reliability_data, the - required lifetime characteristic is automatically set to the operating - time time.

              -

              If status = NULL class is wt_mcs_data, which is not - supported by estimate_cdf due to missing status.

              -

              The tibble contains the following columns:

                -
              • date_1 : Earlier dates. If argument date_1 is a list - of length i, i > 1 (described in Arguments) multiple - columns with names date_1.1, date_1.2, ..., date_1.i - and the corresponding values of the earlier dates are used.

              • -
              • date_2 : Later dates. In the case of a list with length greater - than 1, the routine described above is used.

              • -
              • x : Adjusted operating times for incomplete observations - and input operating times for the complete observations.

              • -
              • status (optional) :

                  -
                • If argument status = NULL column status does - not exist.

                • -
                • If argument status is provided the column contains - the entered binary data (0 or 1).

                • -
              • -
              • id : Identification of every unit.

              • +

                A list with class wt_mcs_delay containing the following elements:

                  +
                • data : A tibble returned by mcs_delay_data where two modifications +has been made:

                    +
                  • If the column status exists, the tibble has additional classes +wt_mcs_data and wt_reliability_data. Otherwise, the tibble only has +the additional class wt_mcs_data (which is not supported by estimate_cdf).

                  • +
                  • The column time is renamed to x (to be in accordance with +reliability_data) and contains the adjusted operating times for incomplete +observations and input operating times for the complete observations.

                • -
                • sim_data : A tibble with column sim_delay that holds the - simulated delay-specific numbers for incomplete cases and 0 for - complete cases. If more than one delay was considered multiple columns - sim_delay.1, sim_delay.2, ..., sim_delay.i with - corresponding delay-specific random numbers are presented.

                • -
                • model_estimation : A list containing a named list - ("delay_distribution") with output of dist_delay. For - multiple delays the list contains as many lists as there are delays, i.e. - ("delay_distribution.1", "delay_distribution.2", ..., - "delay_distribution.i").

                • +
                • sim_data : A tibble with column sim_delay that holds the simulated +delay-specific numbers for incomplete cases and 0 for complete cases. +If more than one delay was considered multiple columns with names sim_delay_1, +sim_delay_2, ..., sim_delay_i and corresponding delay-specific random +numbers are presented.

                • +
                • model_estimation : A list returned by dist_delay.

                Details

                @@ -262,119 +205,104 @@

                Details

                For a better understanding of the MCS application in the context of field data, two cases are described below.

                • Delay in registration: It is common that a supplier, which provides - parts to the manufacturing industry does not know when the unit, in which - its parts are installed, were put in service (due to unknown date_2, - i.e. registration or sales date). Without taking the described delay into - account, the time in service of the failed units would be the difference - between the repair date and date_1 (i.e. the production date) and for - intact units the difference between the present date and date_1. But - the real operating times are (much) shorter, since the stress on the - components have not started until the whole systems were put in service. - Hence, units with incomplete data (missing date_2) must be reduced by - the delays.

                • -
                • Delay in report: Authorized repairers often do not immediately - notify the manufacturer or OEM of repairs that were made during the warranty - period, but instead pass the information about these repairs in collected - forms e.g. weekly, monthly or quarterly. The resulting time difference between - the reporting (date_2) of the repair in the guarantee database and the - actual repair date (date_1), which is often assumed to be the failure - date, is called the reporting delay. For a given date where the analysis - is made there could be units which had a failure but are not registered - and therefore treated as censored units. In order to take this case into - account and according to the principle of equal opportunities, the lifetime - of units with no report date (date_2 = NA) is reduced by simulated - reporting delays.

                • +parts to the manufacturing industry does not know when the unit, in which +its parts are installed, were put in service (due to unknown registration or +sales date (date_2)). Without taking the described delay into account, the +time in service of the failed units would be the difference between the +repair date and the production date (date_1) and for intact units the +difference between the present date and the production date. But the real +operating times are (much) shorter, since the stress on the components have +not started until the whole systems were put in service. Hence, units with +incomplete data (missing date_2) must be reduced by the delays.

                  +
                • Delay in report:: Authorized repairers often do not immediately +notify the manufacturer or OEM of repairs that were made during the warranty +period, but instead pass the information about these repairs in collected +forms e.g. weekly, monthly or quarterly. The resulting time difference between +the reporting (date_2) of the repair in the guarantee database and the +actual repair date (date_1), which is often assumed to be the failure +date, is called the reporting delay. For a given date where the analysis +is made there could be units which had a failure but the failure isn't +reported and therefore they are treated as censored units. In order to take +this into account and according to the principle of equal opportunities, the +lifetime of units with missing report date (date_2[i] = NA) is reduced by +simulated reporting delays.

                References

                Verband der Automobilindustrie e.V. (VDA); Qualitätsmanagement in - der Automobilindustrie. Zuverlässigkeitssicherung bei Automobilherstellern - und Lieferanten. Zuverlässigkeits-Methoden und -Hilfsmittel.; 4th Edition, 2016, - <ISSN:0943-9412>

                +der Automobilindustrie. Zuverlässigkeitssicherung bei Automobilherstellern +und Lieferanten. Zuverlässigkeits-Methoden und -Hilfsmittel.; 4th Edition, 2016, +ISSN:0943-9412

                See also

                - +

                dist_delay for the determination of a parametric delay distribution +and estimate_cdf for the estimation of failure probabilities.

                Examples

                -
                # Data for examples: -date_of_production <- c("2014-07-28", "2014-02-17", "2014-07-14", - "2014-06-26", "2014-03-10", "2014-05-14", - "2014-05-06", "2014-03-07", "2014-03-09", - "2014-04-13", "2014-05-20", "2014-07-07", - "2014-01-27", "2014-01-30", "2014-03-17", - "2014-02-09", "2014-04-14", "2014-04-20", - "2014-03-13", "2014-02-23", "2014-04-03", - "2014-01-08", "2014-01-08") - -date_of_registration <- c(NA, "2014-03-29", "2014-12-06", "2014-09-09", - NA, NA, "2014-06-16", NA, "2014-05-23", - "2014-05-09", "2014-05-31", NA, "2014-04-13", - NA, NA, "2014-03-12", NA, "2014-06-02", - NA, "2014-03-21", "2014-06-19", NA, NA) - -date_of_repair <- c(NA, "2014-09-15", "2015-07-04", "2015-04-10", NA, - NA, "2015-04-24", NA, "2015-04-25", "2015-04-24", - "2015-06-12", NA, "2015-05-04", NA, NA, - "2015-05-22", NA, "2015-09-17", NA, "2015-08-15", - "2015-11-26", NA, NA) - -date_of_report <- c(NA, "2014-10-09", "2015-08-28", "2015-04-15", NA, - NA, "2015-05-16", NA, "2015-05-28", "2015-05-15", - "2015-07-11", NA, "2015-08-14", NA, NA, - "2015-06-05", NA, "2015-10-17", NA, "2015-08-21", - "2015-12-02", NA, NA) - -time_in_service <- rep(1000, length(date_of_production)) -status <- c(0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0) +
                # MCS data preparation: +## Data for delay in registration: +mcs_tbl_1 <- mcs_delay_data( + field_data, + date_1 = production_date, + date_2 = registration_date, + time = dis, + status = status, + id = vin +) + +## Data for delay in report: +mcs_tbl_2 <- mcs_delay_data( + field_data, + date_1 = repair_date, + date_2 = report_date, + time = dis, + status = status, + id = vin +) + +## Data for both delays: +mcs_tbl_both <- mcs_delay_data( + field_data, + date_1 = c(production_date, repair_date), + date_2 = c(registration_date, report_date), + time = dis, + status = status, + id = vin +) # Example 1 - MCS for delay in registration: mcs_regist <- mcs_delay( - date_1 = date_of_production, - date_2 = date_of_registration, - time = time_in_service, - status = status, + x = mcs_tbl_1, distribution = "lognormal" ) # Example 2 - MCS for delay in report: mcs_report <- mcs_delay( - date_1 = date_of_repair, - date_2 = date_of_report, - time = time_in_service, - status = status, + x = mcs_tbl_2, distribution = "exponential" ) - +
                #> Warning: At least one of the date differences is smaller or equal to 0 and is ignored for the estimation step!
                # Example 3 - Reproducibility of random numbers: set.seed(1234) mcs_report_reproduce <- mcs_delay( - date_1 = date_of_repair, - date_2 = date_of_report, - time = time_in_service, - status = status, + x = mcs_tbl_2, distribution = "exponential" ) - +
                #> Warning: At least one of the date differences is smaller or equal to 0 and is ignored for the estimation step!
                # Example 4 - MCS for delays in registration and report with same distribution: mcs_delays <- mcs_delay( - date_1 = list(date_of_production, date_of_repair), - date_2 = list(date_of_registration, date_of_report), - time = time_in_service, - status = status, + x = mcs_tbl_both, distribution = "lognormal" ) - +
                #> Warning: At least one of the date differences is smaller or equal to 0 and is ignored for the estimation step!
                # Example 5 - MCS for delays in registration and report with different distributions: ## Assuming lognormal registration and exponential reporting delays. mcs_delays_2 <- mcs_delay( - date_1 = list(date_of_production, date_of_repair), - date_2 = list(date_of_registration, date_of_report), - time = time_in_service, - status = status, + x = mcs_tbl_both, distribution = c("lognormal", "exponential") ) - +
                #> Warning: At least one of the date differences is smaller or equal to 0 and is ignored for the estimation step!