Skip to content

Commit

Permalink
improve dtw using a vector-based algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
M3nin0 committed Mar 3, 2024
1 parent f5cf45b commit 77f391e
Show file tree
Hide file tree
Showing 7 changed files with 145 additions and 104 deletions.
4 changes: 4 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ C_kernel_modal <- function(x, ncols, nrows, band, window_size) {
.Call(`_sits_C_kernel_modal`, x, ncols, nrows, band, window_size)
}

dtw2vec <- function(x, y) {
.Call(`_sits_dtw2vec`, x, y)
}

dtw <- function() {
.Call(`_sits_dtw`)
}
Expand Down
8 changes: 5 additions & 3 deletions R/sits_som.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,11 @@
#' @param som_radius Radius of SOM neighborhood.
#' @param mode Type of learning algorithm (default = "online").
#'
#' @note The sits package implements the \code{"dtw"} (Dynamic Time Warping)
#' similarity measure. All other similarity measurements are from
#' the \code{\link[kohonen:supersom]{kohonen::supersom (dist.fcts)}}
#' @note The \code{sits} package implements the \code{"dtw"} (Dynamic Time
#' Warping) similarity measure using the vector-based DTW algorithm
#' from the \code{IncDTW} R package. All other similarity measurements
#' come from the
#' \code{\link[kohonen:supersom]{kohonen::supersom (dist.fcts)}}
#' function.
#'
#' @return
Expand Down
1 change: 1 addition & 0 deletions man/sits-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 5 additions & 3 deletions man/sits_som.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,18 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// dtw2vec
double dtw2vec(const arma::vec& x, const arma::vec& y);
RcppExport SEXP _sits_dtw2vec(SEXP xSEXP, SEXP ySEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const arma::vec& >::type x(xSEXP);
Rcpp::traits::input_parameter< const arma::vec& >::type y(ySEXP);
rcpp_result_gen = Rcpp::wrap(dtw2vec(x, y));
return rcpp_result_gen;
END_RCPP
}
// dtw
Rcpp::XPtr<DistanceFunctionPtr> dtw();
RcppExport SEXP _sits_dtw() {
Expand Down Expand Up @@ -662,6 +674,7 @@ static const R_CallMethodDef CallEntries[] = {
{"_sits_C_kernel_max", (DL_FUNC) &_sits_C_kernel_max, 5},
{"_sits_C_kernel_var", (DL_FUNC) &_sits_C_kernel_var, 5},
{"_sits_C_kernel_modal", (DL_FUNC) &_sits_C_kernel_modal, 5},
{"_sits_dtw2vec", (DL_FUNC) &_sits_dtw2vec, 2},
{"_sits_dtw", (DL_FUNC) &_sits_dtw, 0},
{"_sits_C_label_max_prob", (DL_FUNC) &_sits_C_label_max_prob, 1},
{"_sits_linear_interp", (DL_FUNC) &_sits_linear_interp, 1},
Expand Down
197 changes: 100 additions & 97 deletions src/kohonen_dtw.cpp
Original file line number Diff line number Diff line change
@@ -1,155 +1,158 @@
#include <Rcpp.h>

#include <cstdlib>
#include <vector>
#include <cmath>
#include <algorithm>
#include <stdexcept>
#include <RcppArmadillo.h>
// [[Rcpp::depends(RcppArmadillo)]]

#include "./sits_types.h"

using namespace Rcpp;


/**
* Compute the p-norm distance between two 1D C++ vectors.
* Minimum of 2 values.
*
* @description
* The p-norm, also known as the Minkowski norm, is a generalized norm
* calculation that includes several types of distances based on the value of p.
*
* Common values of p include:
* Auxiliary function to calculate the minimum value of `x` and `y`.
*/
double minval(double x, double y)
{
// z > nan for z != nan is required by C the standard
int xnan = std::isnan(x), ynan = std::isnan(y);
if(xnan || ynan) {
if(xnan && !ynan) return y;
if(!xnan && ynan) return x;
return x;
}
return std::min(x,y);
}


/**
* Calculate the `symmetric2` step pattern.
*
* - p = 1 for the Manhattan (city block) distance;
* - p = 2 for the Euclidean norm (distance).
* @description
* This function calculates the `symmetric2` step pattern, which uses a weight
* of 2 for the diagonal step and 1 for the vertical and horizontal to
* compensate for the favor of diagonal steps.
*
* More details about p-norms can be found on Wikipedia:
* https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm
* @note
* For more information on this step pattern, visit the `IncDTW` package
* documentation: https://www.rdocumentation.org/packages/IncDTW/versions/1.1.4.4/topics/dtw2vec
*
* @param a A 1D vector representing the first point in an m-dimensional space.
* @param b A 1D vector representing the second point in an m-dimensional space.
* @param p The value of the norm to use, determining the type of distance
* calculated.
* @reference
* Leodolter, M., Plant, C., & Brändle, N. (2021). IncDTW: An R Package for
* Incremental Calculation of Dynamic Time Warping. Journal of Statistical
* Software, 99(9), 1–23. https://doi.org/10.18637/jss.v099.i09
*
* @note Both vectors 'a' and 'b' must have the same number of dimensions.
* @note This function was adapted from the DTW implementation found at:
* https://github.com/cjekel/DTW_cpp
* Giorgino, T. (2009). Computing and Visualizing Dynamic Time Warping
* Alignments in R: The dtw Package. Journal of Statistical Software, 31(7),
* 1–24. https://doi.org/10.18637/jss.v031.i07
*
* @return The p-norm distance between vectors 'a' and 'b'.
* @return `symmetric2` step pattern value.
*/
double p_norm(std::vector<double> a, std::vector<double> b, double p)
{
double d = 0;

size_t index;
size_t a_size = a.size();

for (index = 0; index < a_size; index++)
{
d += std::pow(std::abs(a[index] - b[index]), p);
}
return std::pow(d, 1.0 / p);
double calculate_step_pattern_symmetric2(
const double gcm10, // vertical
const double gcm11, // diagonal
const double gcm01, // horizontal
const double cm00
) {
return(cm00 + minval(gcm10, minval(cm00 + gcm11, gcm01)));
}


/**
* Compute the Dynamic Time Warping (DTW) distance between two 2D C++ vectors.
* Vector-based Dynamic Time Warping (DTW) distance.
*
* @description
* This function calculates the Dynamic Time Warping (DTW) distance between
* two sequences that can have a different number of data points but must
* share the same number of dimensions. An exception is thrown if the dimensions
* of the input vectors do not match.
* two sequences using the vector-based algorithm proposed by Leodolter
* et al. (2021).
*
* For more information on DTW, visit:
* https://en.wikipedia.org/wiki/Dynamic_time_warping
* The complexity of this function, as presented by Leodolter et al. (2021), is
* equal to O(n).
*
* @param a A 2D vector representing the first sequence
* @param b A 2D vector representing the second sequence.
* @param p The value of p-norm to use for distance calculation.
* For more information on vector-based DTW, visit:
* https://doi.org/10.18637/jss.v099.i09
*
* @throws std::invalid_argument If the dimensions of 'a' and 'b' do not match.
* @param x A `arma::vec` with time-series values.
* @param y A `arma::vec` with time-series values.
*
* @note
* Both vectors 'a', and 'b' should be structured as follows:
* @reference
* Leodolter, M., Plant, C., & Brändle, N. (2021). IncDTW: An R Package for
* Incremental Calculation of Dynamic Time Warping. Journal of Statistical
* Software, 99(9), 1–23. https://doi.org/10.18637/jss.v099.i09
*
* [number_of_data_points][number_of_dimensions]
*
* allowing the DTW distance computation to adapt to any p-norm value specified.
*
* @note The implementation of this DTW distance calculation was adapted from:
* https://github.com/cjekel/DTW_cpp
* @note
* The implementation of this DTW distance calculation was adapted from the
* `IncDTW` R package.
*
* @return The DTW distance between the two input sequences.
* @return DTW distance.
*/
double distance_dtw_op(std::vector<std::vector<double>> a,
std::vector<std::vector<double>> b,
double p)
// [[Rcpp::export]]
double dtw2vec(const arma::vec &x, const arma::vec &y)
{
int n = a.size();
int o = b.size();
int nx = x.size();
int ny = y.size();

int a_m = a[0].size();
int b_m = b[0].size();
double *p1 = new double[nx];
double *p2 = new double[nx];

if (a_m != b_m)
{
throw std::invalid_argument(
"a and b must have the same number of dimensions!"
);
}
std::vector<std::vector<double>> d(n, std::vector<double>(o, 0.0));

d[0][0] = p_norm(a[0], b[0], p);
double *ptmp;
double ret;

for (int i = 1; i < n; i++)
{
d[i][0] = d[i - 1][0] + p_norm(a[i], b[0], p);
}
for (int i = 1; i < o; i++)
// first column
*p1 = std::abs(x[0] - y[0]);
for (int i = 1; i < nx; i++)
{
d[0][i] = d[0][i - 1] + p_norm(a[0], b[i], p);
p1[i] = std::abs(x[i] - y[0]) + p1[i - 1];
}
for (int i = 1; i < n; i++)

for (int j = 1; j < ny; j++)
{
for (int j = 1; j < o; j++)
*p2 = std::abs(x[0] - y[j]) + *(p1);

for (int i = 1; i < nx; i++)
{
d[i][j] = p_norm(a[i], b[j], p) + std::fmin(
std::fmin(d[i - 1][j], d[i][j - 1]), d[i - 1][j - 1]
);
*(p2 + i) = calculate_step_pattern_symmetric2(*(p2 + i - 1), *(p1 + i - 1), *(p1 + i), std::abs(x[i] - y[j]));
}
ptmp = p1;
p1 = p2;
p2 = ptmp;
}
return d[n - 1][o - 1];

ret = *(p1 + nx - 1); // p1[nx-1]

delete[] p1;
delete[] p2;

return (ret);
}


/**
* Dynamic Time Warping (DTW) distance wrapper.
*
* @description
* This function calculates prepare data from `Kohonen` package and calculate
* the DTW distance between two array of points.
* the DTW distance between two 1D time-series.
*
* @param a A 2D vector representing the first sequence.
* @param b A 2D vector representing the second sequence.
* @param np Number of points in vectors `a` and `b`.
* @param nNA Number of NA values in the vectors `a` and `b`.
* @param p1 A 1D array representing the first time-series.
* @param p2 A 1D array representing the second time-series.
* @param np Number of points in arrays `p1` and `p2`.
* @param nNA Number of NA values in the arrays `p1` and `p2`.
*
* @note The function signature was created following the `Kohonen` R package
* specifications for custom distance functions.
*
*
* @return The DTW distance between the two input sequences.
* @return The DTW distance between two time-series.
*/
double kohonen_dtw(double *p1, double *p2, int np, int nNA)
{
std::vector<double> p1_data(p1, p1 + np);
std::vector<double> p2_data(p2, p2 + np);
arma::vec p1_vec(p1, np, false);
arma::vec p2_vec(p2, np, false);

std::vector<std::vector<double>> p1_vec = {p1_data};
std::vector<std::vector<double>> p2_vec = {p2_data};

// p-norm fixed in 2 (equivalent to euclidean distance)
return (distance_dtw_op(p1_vec, p2_vec, 2));
return dtw2vec(p1_vec, p2_vec);
}


// [[Rcpp::export]]
Rcpp::XPtr<DistanceFunctionPtr> dtw()
{
Expand Down
18 changes: 17 additions & 1 deletion tests/testthat/test-som.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
test_that("Calculating time-series distances using DTW", {
# test 1 (example from `IncDTW` article)
t1 <- c(3, 4, 5, 6)
t2 <- c(1, 3, 3, 5, 6)

expect_equal(dtw2vec(t1, t2), 3)

# test 2 (using ndvi time-series)
t1 <- c(0.5001, 0.5060, 0.5079, 0.7141, 0.4262)
t2 <- c(0.3974, 0.5144, 0.5052, 0.6463, 0.5139)

# 0.4323 -> Value confirmed using `dtw` and `IncDTW` packages as reference
expect_equal(dtw2vec(t1, t2), 0.4323)
})


test_that("Creating clustering using Self-organizing Maps with DTW distance", {
set.seed(2903)

Expand All @@ -9,7 +25,7 @@ test_that("Creating clustering using Self-organizing Maps with DTW distance", {
)

expect_true(all(colnames(som_map$labelled_neurons) %in%
c("id_neuron", "label_samples", "count", "prior_prob", "post_prob")))
c("id_neuron", "label_samples", "count", "prior_prob", "post_prob")))

expect_true(som_map$labelled_neurons[1, ]$prior_prob >= 0)
expect_true(som_map$labelled_neurons[1, ]$post_prob >= 0)
Expand Down

0 comments on commit 77f391e

Please sign in to comment.