Skip to content

Commit

Permalink
Benchmarking i/o update
Browse files Browse the repository at this point in the history
  • Loading branch information
TeachRaccooon committed Feb 20, 2025
1 parent 5904f73 commit 1ad160d
Show file tree
Hide file tree
Showing 6 changed files with 119 additions and 72 deletions.
15 changes: 7 additions & 8 deletions benchmark/bench_BQRRP/BQRRP_pivot_quality.cc
Original file line number Diff line number Diff line change
Expand Up @@ -191,18 +191,17 @@ static void sv_ratio(
}

int main(int argc, char *argv[]) {

if(argc <= 1) {
printf("No input provided\n");
return 0;
if (argc != 3) {
// Expected input into this benchmark.
std::cerr << "Usage: " << argv[0] << " <num_rows> <num_cols> <block_size>..." << std::endl;
return 1;
}
auto size = argv[1];

// Declare parameters
int64_t m = std::stol(size);
int64_t n = std::stol(size);
int64_t m = std::stol(argv[1]);
int64_t n = std::stol(argv[2]);
double d_factor = 1.0;
int64_t b_sz = 4096;
int64_t b_sz = std::stol(argv[3]);;
auto state = RandBLAS::RNGState<r123::Philox4x32>();
auto state_constant1 = state;
auto state_constant2 = state;
Expand Down
29 changes: 19 additions & 10 deletions benchmark/bench_BQRRP/BQRRP_runtime_breakdown.cc
Original file line number Diff line number Diff line change
Expand Up @@ -110,25 +110,34 @@ static void call_all_algs(

int main(int argc, char *argv[]) {

if(argc <= 1) {
printf("No input provided\n");
return 0;
if (argc < 5) {
// Expected input into this benchmark.
std::cerr << "Usage: " << argv[0] << " <qr_fname> <num_runs> <num_rows> <num_cols> <block_sizes>..." << std::endl;
return 1;
}
auto size = argv[1];

// Declare parameters
int64_t m = std::stol(size);
int64_t n = std::stol(size);
int64_t m = std::stol(argv[3]);
int64_t n = std::stol(argv[4]);
double d_factor = 1.0;
std::vector<int64_t> b_sz = {250, 500, 1000, 2000, 4000, 8000};
// Fill the block size vector
std::vector<int64_t> b_sz;
for (int i = 0; i < argc-5; ++i)
b_sz.push_back(std::stoi(argv[i + 5]));
// Save elements in string for logging purposes
std::ostringstream oss;
for (const auto &val : b_sz)
oss << val << ", ";
std::string b_sz_string = oss.str();

//std::vector<int64_t> b_sz = {256, 512, 1024, 2048, 4096, 8192};
auto state = RandBLAS::RNGState<r123::Philox4x32>();
auto state_constant = state;
// Timing results
std::vector<long> res;
// Number of algorithm runs. We only record best times.
int64_t numruns = 3;
std::string qr_tall = argv[2];
int64_t numruns = std::stol(argv[2]);
std::string qr_tall = argv[1];

// Allocate basic workspace
QR_speed_benchmark_data<double> all_data(m, n, d_factor);
Expand All @@ -150,7 +159,7 @@ int main(int argc, char *argv[]) {
"\nNum OMP threads:" + std::to_string(RandLAPACK::util::get_omp_threads()) +
"\nInput type:" + std::to_string(m_info.m_type) +
"\nInput size:" + std::to_string(m) + " by " + std::to_string(n) +
"\nAdditional parameters: Tall QR subroutine " + argv[2] + " BQRRP block size start: " + std::to_string(b_sz.front()) + " BQRRP block size end: " + std::to_string(b_sz.back()) + " num runs per size " + std::to_string(numruns) + " BQRRP d factor: " + std::to_string(d_factor) +
"\nAdditional parameters: Tall QR subroutine " + argv[2] + " BQRRP block sizes: " + b_sz_string + "num runs per size " + std::to_string(numruns) + " BQRRP d factor: " + std::to_string(d_factor) +
"\n";
file.flush();

Expand Down
28 changes: 17 additions & 11 deletions benchmark/bench_BQRRP/BQRRP_speed_comparisons_block_size.cc
Original file line number Diff line number Diff line change
Expand Up @@ -187,25 +187,31 @@ static void call_all_algs(

int main(int argc, char *argv[]) {

if(argc <= 1) {
printf("No input provided\n");
return 0;
if (argc < 4) {
// Expected input into this benchmark.
std::cerr << "Usage: " << argv[0] << " <num_runs> <num_rows> <num_cols> <block_sizes>..." << std::endl;
return 1;
}

auto size = argv[1];

// Declare parameters
int64_t m = std::stol(size);
int64_t n = std::stol(size);
int64_t m = std::stol(argv[2]);
int64_t n = std::stol(argv[3]);
double d_factor = 1.0;
std::vector<int64_t> b_sz = {250, 500, 1000, 2000, 4000, 8000};
//std::vector<int64_t> b_sz = {256, 512, 1024, 2048, 4096, 8192};
std::vector<int64_t> b_sz;
for (int i = 0; i < argc-4; ++i)
b_sz.push_back(std::stoi(argv[i + 4]));
// Save elements in string for logging purposes
std::ostringstream oss;
for (const auto &val : b_sz)
oss << val << ", ";
std::string b_sz_string = oss.str();

auto state = RandBLAS::RNGState<r123::Philox4x32>();
auto state_constant = state;
// Timing results
std::vector<long> res;
// Number of algorithm runs. We only record best times.
int64_t numruns = 3;
int64_t numruns = std::stol(argv[1]);

// Allocate basic workspace
QR_speed_benchmark_data<double> all_data(m, n, d_factor);
Expand All @@ -227,7 +233,7 @@ int main(int argc, char *argv[]) {
"\nNum OMP threads:" + std::to_string(RandLAPACK::util::get_omp_threads()) +
"\nInput type:" + std::to_string(m_info.m_type) +
"\nInput size:" + std::to_string(m) + " by " + std::to_string(n) +
"\nAdditional parameters: BQRRP block size start: " + std::to_string(b_sz.front()) + " BQRRP block size end: " + std::to_string(b_sz.back()) + " num runs per size " + std::to_string(numruns) + " BQRRP d factor: " + std::to_string(d_factor) +
"\nAdditional parameters: BQRRP block sizes: " + b_sz_string + "num runs per size " + std::to_string(numruns) + " BQRRP d factor: " + std::to_string(d_factor) +
"\n";
file.flush();

Expand Down
37 changes: 23 additions & 14 deletions benchmark/bench_BQRRP/BQRRP_speed_comparisons_mat_size.cc
Original file line number Diff line number Diff line change
Expand Up @@ -190,29 +190,37 @@ static void call_all_algs(

int main(int argc, char *argv[]) {

if(argc <= 1) {
printf("No input provided\n");
return 0;
if (argc < 3) {
// Expected input into this benchmark.
std::cerr << "Usage: " << argv[0] << " <num_runs> <block_size> <square_matrix_dim (multiple)>..." << std::endl;
return 1;
}

auto block_size = argv[1];

// Declare parameters
int64_t m_start = std::pow(2, 10);
int64_t m_end = std::pow(2, 16);
// Fill the block size vector
std::vector<int64_t> m_sz;
for (int i = 0; i < argc-3; ++i)
m_sz.push_back(std::stoi(argv[i + 3]));
// Save elements in string for logging purposes
std::ostringstream oss;
for (const auto &val : m_sz)
oss << val << ", ";
std::string m_sz_string = oss.str();

double d_factor = 1.0;
int64_t b_sz = std::stol(block_size);
int64_t b_sz = std::stol(argv[2]);
auto state = RandBLAS::RNGState<r123::Philox4x32>();
auto state_constant = state;
// Timing results
std::vector<long> res;
// Number of algorithm runs. We only record best times.
int64_t numruns = 3;
int64_t numruns = std::stol(argv[1]);

// Allocate basic workspace
QR_speed_benchmark_data<double> all_data(m_end, m_end, b_sz, d_factor);
int64_t m_max = *std::max_element(m_sz.begin(), m_sz.end());
QR_speed_benchmark_data<double> all_data(m_max, m_max, b_sz, d_factor);
// Generate the input matrix - gaussian suffices for performance tests.
RandLAPACK::gen::mat_gen_info<double> m_info(m_end, m_end, RandLAPACK::gen::gaussian);
RandLAPACK::gen::mat_gen_info<double> m_info(m_max, m_max, RandLAPACK::gen::gaussian);
RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state);

// Declare a data file
Expand All @@ -228,13 +236,14 @@ int main(int argc, char *argv[]) {
" rows correspond to BQRRP runs with mat sizes varying with powers of 2, with numruns repititions of each mat size."
"\nNum OMP threads:" + std::to_string(RandLAPACK::util::get_omp_threads()) +
"\nInput type:" + std::to_string(m_info.m_type) +
"\nInput size:" + " dim start: " + std::to_string(m_start) + " dim stop: " + std::to_string(m_end) +
"\nInput size:" + " dim start: " + m_sz_string +
"\nAdditional parameters: BQRRP block size: " + std::to_string(b_sz) + " num runs per size " + std::to_string(numruns) + " BQRRP d factor: " + std::to_string(d_factor) +
"\n";
file.flush();

for (;m_start <= m_end; m_start *= 2) {
call_all_algs(m_info, numruns, m_start, all_data, state_constant, output_filename);
size_t i = 0;
for (;i < m_sz.size(); ++i) {
call_all_algs(m_info, numruns, m_sz[i], all_data, state_constant, output_filename);
}
}
#endif
48 changes: 32 additions & 16 deletions benchmark/bench_BQRRP/BQRRP_subroutines_speed.cc
Original file line number Diff line number Diff line change
Expand Up @@ -352,27 +352,43 @@ static void call_apply_q(

int main(int argc, char *argv[]) {

auto size = argv[1];
if (argc < 3) {
// Expected input into this benchmark.
std::cerr << "Usage: " << argv[0] << " <num_runs> <num_rows> <num_cols(multiple, increasing order)> ..." << std::endl;
return 1;
}

int64_t i = 0;
size_t i = 0;
// Declare parameters
int64_t m = std::stol(size);
int64_t n_start = 256;
int64_t n_stop = 2048;
int64_t nb_start = 256;
int64_t m = std::stol(argv[2]);
int64_t n_start = 256;
int64_t n_stop = 2048;
// Fill the n size vector
std::vector<int64_t> n_sz;
for (int i = 0; i < argc-3; ++i)
n_sz.push_back(std::stoi(argv[i + 3]));
// Save elements in string for logging purposes
std::ostringstream oss;
for (const auto &val : n_sz)
oss << val << ", ";
std::string n_sz_string = oss.str();

// Internal block size for ORMQR, will increase by 2 at every iteration
int64_t nb_start = n_start;
auto state = RandBLAS::RNGState();
auto state_B = RandBLAS::RNGState();
auto state_constant = state;
auto state_constant_B = state;
// Timing results
std::vector<long> res;
// Number of algorithm runs. We only record best times.
int64_t numruns = 3;
int64_t numruns = std::stol(argv[1]);

// Allocate basic workspace
benchmark_data<double> all_data(m, n_stop);
int64_t n_max = *std::max_element(n_sz.begin(), n_sz.end());
benchmark_data<double> all_data(m, n_max);
// Generate the input matrix - gaussian suffices for performance tests.
RandLAPACK::gen::mat_gen_info<double> m_info(m, n_stop, RandLAPACK::gen::gaussian);
RandLAPACK::gen::mat_gen_info<double> m_info(m, n_max, RandLAPACK::gen::gaussian);
RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state);
RandLAPACK::gen::mat_gen(m_info, all_data.B.data(), state_B);

Expand All @@ -391,18 +407,18 @@ int main(int argc, char *argv[]) {
" \n In all cases, rows vary from n_start to n_stop in powers of two (with numruns runs per size)."
"\nNum OMP threads:" + std::to_string(RandLAPACK::util::get_omp_threads()) +
"\nInput type:" + std::to_string(m_info.m_type) +
"\nInput size:" + std::to_string(m) + " by " + std::to_string(n_start) + " to " + std::to_string(n_stop) +
"\nInput size:" + std::to_string(m) + " by " + n_sz_string +
"\nAdditional parameters num runs per size " + std::to_string(numruns) + " nb_start " + std::to_string(nb_start) +
"\n";
file.flush();

for (i = n_start; i <= n_stop; i *= 2)
call_wide_qrcp(m_info, numruns, i, all_data, state, output_filename);
for (;i < n_sz.size(); ++i)
call_wide_qrcp(m_info, numruns, n_sz[i], all_data, state, output_filename);

for (i = n_start; i <= n_stop; i *= 2)
call_tsqr(m_info, numruns, i, nb_start, all_data, state, output_filename);
for (;i < n_sz.size(); ++i)
call_tsqr(m_info, numruns, n_sz[i], nb_start, all_data, state, output_filename);

for (i = n_start; i <= n_stop; i *= 2)
call_apply_q(m_info, numruns, i, nb_start, all_data, state, state_B, output_filename);
for (;i < n_sz.size(); ++i)
call_apply_q(m_info, numruns, n_sz[i], nb_start, all_data, state, state_B, output_filename);
}
#endif
34 changes: 21 additions & 13 deletions benchmark/bench_BQRRP/HQRRP_runtime_breakdown.cc
Original file line number Diff line number Diff line change
Expand Up @@ -96,24 +96,32 @@ static void call_all_algs(

int main(int argc, char *argv[]) {

if(argc <= 1) {
printf("No input provided\n");
return 0;
if (argc < 4) {
// Expected input into this benchmark.
std::cerr << "Usage: " << argv[0] << " <num_runs> <num_rows> <num_cols> <block_sizes>..." << std::endl;
return 1;
}
auto size = argv[1];

// Declare parameters
int64_t m = std::stol(size);
int64_t n = std::stol(size);
int64_t m = std::stol(argv[2]);
int64_t n = std::stol(argv[3]);
double d_factor = 1.0;
int64_t b_sz_start = 32;
int64_t b_sz_end = 8192;
// Fill the block size vector
std::vector<int64_t> b_sz;
for (int i = 0; i < argc-4; ++i)
b_sz.push_back(std::stoi(argv[i + 4]));
// Save elements in string for logging purposes
std::ostringstream oss;
for (const auto &val : b_sz)
oss << val << ", ";
std::string b_sz_string = oss.str();

auto state = RandBLAS::RNGState();
auto state_constant = state;
// Timing results
std::vector<long> res;
// Number of algorithm runs.
int64_t numruns = 1;
int64_t numruns = std::stol(argv[1]);;

// Allocate basic workspace
QR_speed_benchmark_data<double> all_data(m, n, d_factor);
Expand All @@ -135,13 +143,13 @@ int main(int argc, char *argv[]) {
"\nNum OMP threads:" + std::to_string(RandLAPACK::util::get_omp_threads()) +
"\nInput type:" + std::to_string(m_info.m_type) +
"\nInput size:" + std::to_string(m) + " by " + std::to_string(n) +
"\nAdditional parameters: HQRRP block size start: " + std::to_string(b_sz_start) + " HQRRP block size end: " + std::to_string(b_sz_end) + " num runs per size " + std::to_string(numruns) + " HQRRP d factor: " + std::to_string(d_factor) +
"\nAdditional parameters: HQRRP block sizes: " + b_sz_string + "num runs per size " + std::to_string(numruns) + " HQRRP d factor: " + std::to_string(d_factor) +
"\n";
file.flush();


for (;b_sz_start <= b_sz_end; b_sz_start *= 2) {
call_all_algs(m_info, numruns, b_sz_start, all_data, state_constant, output_filename);
size_t i = 0;
for (;i < b_sz.size(); ++i) {
call_all_algs(m_info, numruns, b_sz[i], all_data, state_constant, output_filename);
}
}
#endif

0 comments on commit 1ad160d

Please sign in to comment.