Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

revert back to old cosine #7

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions gbrl/src/cpp/fitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ void Fitter::step_cpu(dataSet *dataset, ensembleData *edata, ensembleMetaData *m
}

float *norm_grads = nullptr;
if (metadata->n_cat_features > 0){
if (metadata->split_score_func == Cosine || metadata->n_cat_features > 0){
norm_grads = init_zero_mat(dataset->n_samples*metadata->output_dim);
calculate_squared_norm(norm_grads, dataset->grads, dataset->n_samples, metadata->output_dim, par_th);
}
Expand Down Expand Up @@ -74,6 +74,7 @@ void Fitter::step_cpu(dataSet *dataset, ensembleData *edata, ensembleMetaData *m
generator.processCategoricalCandidates(dataset->categorical_obs, norm_grads);

dataset->build_grads = build_grads;
dataset->norm_grads = norm_grads;
int added_leaves = 0;
if (metadata->grow_policy == GREEDY)
added_leaves = Fitter::fit_greedy_tree(dataset, edata, metadata, generator);
Expand Down Expand Up @@ -187,6 +188,7 @@ float Fitter::fit_cpu(dataSet *dataset, const float* targets, ensembleData *edat
calculate_squared_norm(norm_grads, grads, batch_dataset.n_samples, metadata->output_dim, metadata->par_th);
}
batch_dataset.build_grads = build_grads;
batch_dataset.norm_grads = norm_grads;

int added_leaves = 0;
if (metadata->grow_policy == GREEDY)
Expand Down Expand Up @@ -275,7 +277,7 @@ int Fitter::fit_greedy_tree(dataSet *dataset, ensembleData *edata, ensembleMetaD
best_score = -INFINITY;
if (to_split){
if (metadata->split_score_func == Cosine){
parent_score = scoreCosine(crnt_node->sample_indices, crnt_node->n_samples, dataset->build_grads, metadata->output_dim);
parent_score = scoreCosine(crnt_node->sample_indices, crnt_node->n_samples, dataset->build_grads, dataset->norm_grads, metadata->output_dim);
} else if (metadata->split_score_func == L2){
parent_score = scoreL2(crnt_node->sample_indices, crnt_node->n_samples, dataset->build_grads, metadata->output_dim);
} else{
Expand Down
15 changes: 8 additions & 7 deletions gbrl/src/cpp/gbrl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ float* GBRL::predict(const float *obs, const char *categorical_obs, const int n_
}
#endif

dataSet dataset{obs, categorical_obs, nullptr, nullptr, nullptr, n_samples, device};
dataSet dataset{obs, categorical_obs, nullptr, nullptr, nullptr, nullptr, n_samples, device};
float *preds = nullptr;
// int n_trees = this->get_num_trees();
#ifdef USE_CUDA
Expand Down Expand Up @@ -457,7 +457,7 @@ void GBRL::_step_gpu(dataSet *dataset){
preprocess_matrices(gpu_build_grads, gpu_grads_norm, n_samples, output_dim, this->metadata->split_score_func);

int n_candidates = process_candidates_cuda(gpu_obs, dataset->categorical_obs, gpu_grads_norm, candidate_indices, candidate_values, candidate_categories, candidate_numerical, n_samples, n_num_features, n_cat_features, n_bins, this->metadata->generator_type);
dataSet cuda_dataset{trans_obs, gpu_categorical_obs, gpu_grads, gpu_feature_weights, gpu_build_grads, n_samples, this->device};
dataSet cuda_dataset{trans_obs, gpu_categorical_obs, gpu_grads, gpu_feature_weights, gpu_build_grads, gpu_grads_norm, n_samples, this->device};
candidatesData candidata{n_candidates, candidate_indices, candidate_values, candidate_numerical, candidate_categories};
splitDataGPU *split_data = allocate_split_data(this->metadata, candidata.n_candidates);
if (this->metadata->grow_policy == GREEDY)
Expand Down Expand Up @@ -550,7 +550,7 @@ float GBRL::_fit_gpu(dataSet *dataset, float *targets, const int n_iterations){
cudaMemcpy(gpu_categorical_obs, dataset->categorical_obs, sizeof(char)*n_cat_features*n_samples*MAX_CHAR_SIZE, cudaMemcpyHostToDevice);
transpose_matrix(gpu_obs, trans_obs, n_num_features, n_samples);

dataSet cuda_dataset{gpu_obs, gpu_categorical_obs, gpu_grads, gpu_feature_weights, gpu_build_grads, n_samples, this->device};
dataSet cuda_dataset{gpu_obs, gpu_categorical_obs, gpu_grads, gpu_feature_weights, gpu_build_grads, gpu_grads_norm, n_samples, this->device};
predict_cuda_no_host(&cuda_dataset, gpu_preds, this->metadata, this->edata, this->cuda_opt, this->n_cuda_opts, 0, 0, true);

MultiRMSEGrad(gpu_preds, gpu_targets, gpu_grads, output_dim, n_samples, n_blocks, threads_per_block);
Expand All @@ -566,6 +566,7 @@ float GBRL::_fit_gpu(dataSet *dataset, float *targets, const int n_iterations){
cuda_dataset.grads = gpu_grads;
cuda_dataset.obs = trans_obs;
cuda_dataset.build_grads = gpu_build_grads;
cuda_dataset.norm_grads = gpu_grads_norm;
if (this->metadata->grow_policy == GREEDY)
fit_tree_greedy_cuda(&cuda_dataset, this->edata, this->metadata, &candidata, split_data);
else
Expand Down Expand Up @@ -618,7 +619,7 @@ void GBRL::step(const float *obs, const char *categorical_obs, float *grads, con
return;
}
#endif
dataSet dataset{obs, categorical_obs, grads, feature_weights, nullptr, n_samples, device};
dataSet dataset{obs, categorical_obs, grads, feature_weights, nullptr, nullptr, n_samples, device};
#ifdef USE_CUDA
if (this->device == gpu)
this->_step_gpu(&dataset);
Expand Down Expand Up @@ -697,7 +698,7 @@ float GBRL::fit(float *obs, char *categorical_obs, float *targets, const float *

float *bias = calculate_mean(training_targets, n_samples, output_dim, metadata->par_th);
this->set_bias(bias, this->metadata->output_dim);
dataSet dataset{training_obs, training_cat_obs, nullptr, feature_weights, nullptr, n_samples, this->device};
dataSet dataset{training_obs, training_cat_obs, nullptr, feature_weights, nullptr, nullptr, n_samples, this->device};

float full_loss = -INFINITY;
#ifdef USE_CUDA
Expand Down Expand Up @@ -880,7 +881,7 @@ ensembleData *edata_cpu = nullptr;
shap_data->base_poly = base_poly;
shap_data->norm_values = norm;
float *shap_values = init_zero_mat((this->metadata->n_num_features + this->metadata->n_cat_features)*this->metadata->output_dim * n_samples);
dataSet dataset{obs, categorical_obs, nullptr, nullptr, nullptr, n_samples, this->device};
dataSet dataset{obs, categorical_obs, nullptr, nullptr, nullptr, nullptr, n_samples, this->device};
// print_shap_data(shap_data, this->metadata);
get_shap_values(this->metadata, edata_cpu, shap_data, &dataset, shap_values);
dealloc_shap_data(shap_data);
Expand All @@ -895,7 +896,7 @@ ensembleData *edata_cpu = nullptr;
float* GBRL::ensemble_shap(const float *obs, const char *categorical_obs, const int n_samples, float *norm, float *base_poly, float *offset){
valid_tree_idx(0, this->metadata);
float *shap_values = init_zero_mat((this->metadata->n_num_features + this->metadata->n_cat_features)*this->metadata->output_dim * n_samples);
dataSet dataset{obs, categorical_obs, nullptr, nullptr, nullptr, n_samples, this->device};
dataSet dataset{obs, categorical_obs, nullptr, nullptr, nullptr, nullptr, n_samples, this->device};
ensembleData *edata_cpu = nullptr;
#ifdef USE_CUDA
if (this->device == gpu){
Expand Down
30 changes: 4 additions & 26 deletions gbrl/src/cpp/math_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,39 +75,17 @@ inline float squared_norm(const float *vec, const int n_samples){
return sum;
}

inline float cosine_dist(const int *indices, const float *raw_grads, const float *mean, const int n_samples, const int n_cols){
inline float cosine_dist(const int *indices, const float *raw_grads, const float *mean, const int n_samples, const int n_cols, float squared_norms){
if (n_samples == 0)
return 0.0f;
float n_samples_f = static_cast<float>(n_samples);
float sum_dot_product = mat_vec_dot_sum(indices, raw_grads, mean, n_samples, n_cols);
float mean_norm = squared_norm(mean, n_cols);
float denominator = mean_norm * n_samples_f;
float mean_norm = norm(mean, n_cols);
float denominator = mean_norm * sqrtf(squared_norms);
if (denominator == 0.0f) {
return 0.0f;
}
return (sum_dot_product / sqrt(denominator)) ;
}

inline float cosine_score(const int *true_indices, const int *false_indices, const float *raw_grads, const float *true_mean, const float *false_mean, const int true_n_samples, const int false_n_samples, const int n_cols){
float true_numerator = 0.0f, false_numerator = 0.0f;
float true_n_samples_f = static_cast<float>(true_n_samples), false_n_samples_f = static_cast<float>(false_n_samples);
if (true_n_samples > 0)
true_numerator = mat_vec_dot_sum(true_indices, raw_grads, true_mean, true_n_samples, n_cols);
if (false_n_samples > 0)
false_numerator = mat_vec_dot_sum(false_indices, raw_grads, false_mean, false_n_samples, n_cols);

float true_mean_norm = squared_norm(true_mean, n_cols);
float false_mean_norm = squared_norm(false_mean, n_cols);
float true_denominator = true_mean_norm * true_n_samples_f;
float false_denominator = false_mean_norm * false_n_samples_f;

float numerator = true_numerator + false_numerator;
float denominator = true_denominator + false_denominator;

if (denominator == 0.0f) {
return 0.0f;
}
return (numerator / sqrtf(denominator)) ;
return (sum_dot_product / denominator) * n_samples_f;
}

#endif
Loading
Loading