Skip to content

Commit

Permalink
Ready to benchmark on large matrices
Browse files Browse the repository at this point in the history
  • Loading branch information
TeachRaccooon committed Feb 28, 2024
1 parent d8a1fd2 commit 01b1f8a
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 56 deletions.
27 changes: 3 additions & 24 deletions RandLAPACK/drivers/rl_rbki.hh
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,6 @@ int RBKI<T, RNG>::call(
sketching_t_dur = duration_cast<microseconds>(sketching_t_stop - sketching_t_start).count();
gemm_A_t_start = high_resolution_clock::now();
}
printf("m %d, n %d, k %d\n", m, n, k);
char name[] = "A";
//RandBLAS::util::print_colmaj(n, k, Y_i, name);

// [X_ev, ~] = qr(A * Y_i, 0)
blas::gemm(Layout::ColMajor, Op::NoTrans, Op::NoTrans, m, k, n, 1.0, A, m, Y_i, n, 0.0, X_i, m);
Expand Down Expand Up @@ -235,7 +232,6 @@ int RBKI<T, RNG>::call(
main_loop_t_start = high_resolution_clock::now();

if (iter % 2 != 0) {
printf("First\n");
if(this -> timing)
gemm_A_t_start = high_resolution_clock::now();

Expand Down Expand Up @@ -307,7 +303,7 @@ int RBKI<T, RNG>::call(
// Early termination
// if (abs(R(end)) <= sqrt(eps('double')))
if(std::abs(R_ii[(n + 1) * (k - 1)]) < std::sqrt(std::numeric_limits<double>::epsilon())) {
printf("TERMINATION 1 at iteration %ld\n", iter);
//printf("TERMINATION 1 at iteration %ld\n", iter);
break;
}

Expand All @@ -319,7 +315,6 @@ int RBKI<T, RNG>::call(
++iter_ev;
}
else {
printf("Second\n");
if(this -> timing)
gemm_A_t_start = high_resolution_clock::now();

Expand Down Expand Up @@ -386,7 +381,7 @@ int RBKI<T, RNG>::call(
// Early termination
// if (abs(S(end)) <= sqrt(eps('double')))
if(std::abs(S_ii[((n + k) + 1) * (k - 1)]) < std::sqrt(std::numeric_limits<double>::epsilon())) {
printf("TERMINATION 2 at iteration %ld\n", iter);
//printf("TERMINATION 2 at iteration %ld\n", iter);
break;
}

Expand Down Expand Up @@ -418,19 +413,15 @@ int RBKI<T, RNG>::call(
++iter;
//norm(R, 'fro') > sqrt(1 - sq_tol) * norm_A
if(norm_R > threshold) {
printf("Threshold termination\n");
//printf("Threshold termination\n");
break;
}
printf("Iter_ev + iter_od %d\n", iter_ev + iter_od);
}
printf("Total iters %d\n", iter);

this -> norm_R_end = norm_R;
this->num_krylov_iters = iter;
iter % 2 == 0 ? end_rows = k * (iter_ev + 1), end_cols = k * iter_ev : end_rows = k * (iter_od + 1), end_cols = k * iter_od;

printf("End rows & cols %d, %d\n", end_rows, end_cols);

if(this -> timing) {
allocation_t_start = high_resolution_clock::now();
}
Expand All @@ -445,23 +436,11 @@ int RBKI<T, RNG>::call(
}

if (iter % 2 != 0) {
printf("First option\n");
// [U_hat, Sigma, V_hat] = svd(R')
lapack::gesdd(Job::SomeVec, end_rows, end_cols, R, n, Sigma, U_hat, end_rows, VT_hat, end_cols);
} else {
printf("Second option\n");
// [U_hat, Sigma, V_hat] = svd(S)
lapack::gesdd(Job::SomeVec, end_rows, end_cols, S, n + k, Sigma, U_hat, end_rows, VT_hat, end_cols);
/*
char name1[] = "U_hat";
RandBLAS::util::print_colmaj(end_rows, end_cols, U_hat, name1);
char name3[] = "Sigma";
RandBLAS::util::print_colmaj(n, 1, Sigma, name3);
char name2[] = "V_hat";
RandBLAS::util::print_colmaj(end_cols, end_cols, VT_hat, name2);
*/
}
// U = X_ev * U_hat
blas::gemm(Layout::ColMajor, Op::NoTrans, Op::NoTrans, m, end_cols, end_rows, 1.0, X_ev, m, U_hat, end_rows, 0.0, U, m);
Expand Down
42 changes: 10 additions & 32 deletions benchmark/bench_RBKI/RBKI_speed_comparisons.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,30 +71,18 @@ static void update_best_time(int iter, long &t_best, long &t_curr, T* S1, T* S2,
// Scale columns of U by S
for (int i = 0; i < target_rank; ++i)
blas::scal(n, all_data.Sigma[i], &U_cpy_dat[m * i], 1);

// Compute AV(:, 1:custom_rank) - SU(1:custom_rank)
blas::gemm(Layout::ColMajor, Op::NoTrans, Op::Trans, m, custom_rank, n, 1.0, all_data.A.data(), m, all_data.VT.data(), n, -1.0, U_cpy_dat, m);

// A'U - VS
// Scale columns of V by S
// Since we have VT, we will be scaling its rows

//char name[] = "V_cpy_pre";
//RandBLAS::util::print_colmaj(n, n, VT_cpy_dat, name);

for (int i = 0; i < n; ++i)
blas::scal(custom_rank, all_data.Sigma[i], &VT_cpy_dat[i], n);

//char name1[] = "V_cpy_post";
//RandBLAS::util::print_colmaj(n, n, VT_cpy_dat, name1);

// Compute A'U(:, 1:custom_rank) - VS(1:custom_rank).
// We will actually have to perform U' * A - Sigma * VT.
blas::gemm(Layout::ColMajor, Op::Trans, Op::NoTrans, target_rank, custom_rank, m, 1.0, all_data.U.data(), m, all_data.A.data(), m, -1.0, VT_cpy_dat, n);

//char name3[] = "A'U";
//RandBLAS::util::print_colmaj(n, n, VT_cpy_dat, name3);

T nrm1 = lapack::lange(Norm::Fro, m, custom_rank, U_cpy_dat, m) / std::sqrt(custom_rank);
T nrm2 = lapack::lange(Norm::Fro, target_rank, custom_rank, VT_cpy_dat, n) / std::sqrt(custom_rank);

Expand Down Expand Up @@ -145,27 +133,17 @@ static void call_all_algs(
RBKI.call(m, n, all_data.A.data(), m, b_sz, all_data.U.data(), all_data.VT.data(), all_data.Sigma.data(), state);
auto stop_rbki = high_resolution_clock::now();
dur_rbki = duration_cast<microseconds>(stop_rbki - start_rbki).count();

char name[] = "A";
//RandBLAS::util::print_colmaj(m, n, all_data.A.data(), name);

char name1[] = "U";
//RandBLAS::util::print_colmaj(m, target_rank, all_data.U.data(), name1);

char name3[] = "Sigma";
//RandBLAS::util::print_colmaj(target_rank, 1, all_data.Sigma.data(), name3);

char name2[] = "VT";
//RandBLAS::util::print_colmaj(n, n, all_data.VT.data(), name2);


T residual_err = residual_error_comp<T>(all_data, target_rank, custom_rank);
T residual_err_custom = residual_error_comp<T>(all_data, target_rank, custom_rank);
T residual_err_target = residual_error_comp<T>(all_data, target_rank, target_rank);

// Print accuracy info
printf("sqrt(||AV - SU||^2_F + ||A'U - VS||^2_F) / sqrt(traget_rank): %.16e\n", residual_err);
printf("sqrt(||AV - SU||^2_F + ||A'U - VS||^2_F) / sqrt(custom_rank): %.16e\n", residual_err_custom);
printf("sqrt(||AV - SU||^2_F + ||A'U - VS||^2_F) / sqrt(traget_rank): %.16e\n", residual_err_target);

std::ofstream file(output_filename, std::ios::app);
file << b_sz << ", " << RBKI.max_krylov_iters << ", " << target_rank << ", " << custom_rank << ", " << residual_err << ", " << dur_rbki << ", " << dur_svd << ",\n";
file << b_sz << ", " << RBKI.max_krylov_iters << ", " << target_rank << ", " << custom_rank << ", " << residual_err_target << ", " << residual_err_custom << ", " << dur_rbki << ", " << dur_svd << ",\n";

state_gen = state;
data_regen<T, RNG>(m_info, all_data, state_gen, 0);
Expand All @@ -187,12 +165,12 @@ int main(int argc, char *argv[]) {
int64_t b_sz_stop = 0;
int64_t target_rank_start = 512;
int64_t target_rank_curr = target_rank_start;
int64_t target_rank_stop = 512;
int64_t custom_rank = 32;
int64_t target_rank_stop = 4096;
int64_t custom_rank = 10;
double tol = std::pow(std::numeric_limits<double>::epsilon(), 0.85);
auto state = RandBLAS::RNGState();
auto state_constant = state;
int numruns = 1;
int numruns = 3;
long dur_svd = 0;
std::vector<long> res;

Expand All @@ -206,8 +184,8 @@ int main(int argc, char *argv[]) {
// Update basic params.
m = m_info.rows;
n = m_info.cols;
b_sz_start = 16;//std::max((int64_t) 1, n / 10);
b_sz_stop = 16;//std::max((int64_t) 1, n / 10);
b_sz_start = 8;//std::max((int64_t) 1, n / 10);
b_sz_stop = 128;//std::max((int64_t) 1, n / 10);

// Allocate basic workspace.
RBKI_benchmark_data<double> all_data(m, n, tol);
Expand Down

0 comments on commit 01b1f8a

Please sign in to comment.