diff --git a/tig-algorithms/src/vector_search/brute_force_bacalhau/benchmarker_outbound.rs b/tig-algorithms/src/vector_search/brute_force_bacalhau/benchmarker_outbound.rs new file mode 100644 index 00000000..8a58f5cb --- /dev/null +++ b/tig-algorithms/src/vector_search/brute_force_bacalhau/benchmarker_outbound.rs @@ -0,0 +1,113 @@ +/*! +Copyright 2024 Louis Silva + +Licensed under the TIG Benchmarker Outbound Game License v1.0 (the "License"); you +may not use this file except in compliance with the License. You may obtain a copy +of the License at + +https://github.com/tig-foundation/tig-monorepo/tree/main/docs/licenses + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the specific +language governing permissions and limitations under the License. + */ + +use anyhow::Result; + +use tig_challenges::vector_search::*; + +#[inline] +fn l2_norm(x: &[f32]) -> f32 { + x.iter().map(|&val| val * val).sum::().sqrt() +} + +#[inline] +fn euclidean_distance_with_precomputed_norm( + a_norm_sq: f32, + b_norm_sq: f32, + ab_dot_product: f32 +) -> f32 { + (a_norm_sq + b_norm_sq - 2.0 * ab_dot_product).sqrt() +} + +pub fn solve_challenge(challenge: &Challenge) -> Result> { + let vector_database: &Vec> = &challenge.vector_database; + let query_vectors: &Vec> = &challenge.query_vectors; + let max_distance: f32 = challenge.max_distance; + + let mut indexes: Vec = Vec::with_capacity(query_vectors.len()); + let mut vector_norms_sq: Vec = Vec::with_capacity(vector_database.len()); + + let mut sum_norms_sq: f32 = 0.0; + let mut sum_squares: f32 = 0.0; + + for vector in vector_database { + let norm_sq: f32 = vector.iter().map(|&val| val * val).sum(); + sum_norms_sq += norm_sq.sqrt(); + sum_squares += norm_sq; + vector_norms_sq.push(norm_sq); + } + + let vector_norms_len: f32 = vector_norms_sq.len() as f32; + let std_dev: f32 = ((sum_squares / vector_norms_len) - (sum_norms_sq / vector_norms_len).powi(2)).sqrt(); + let norm_threshold: f32 = 2.0 * std_dev; + + for query in query_vectors { + let query_norm_sq: f32 = query.iter().map(|&val| val * val).sum(); + + let mut closest_index: Option = None; + let mut closest_distance: f32 = f32::MAX; + + for (idx, vector) in vector_database.iter().enumerate() { + let vector_norm_sq = vector_norms_sq[idx]; + if ((vector_norm_sq.sqrt() - query_norm_sq.sqrt()).abs()) > norm_threshold { + continue; + } + + let ab_dot_product: f32 = query.iter().zip(vector).map(|(&x1, &x2)| x1 * x2).sum(); + let distance: f32 = euclidean_distance_with_precomputed_norm( + query_norm_sq, + vector_norm_sq, + ab_dot_product, + ); + + if distance <= max_distance { + closest_index = Some(idx); + break; // Early exit + } else if distance < closest_distance { + closest_index = Some(idx); + closest_distance = distance; + } + } + + if let Some(index) = closest_index { + indexes.push(index); + } else { + return Ok(None); + } + } + + Ok(Some(Solution { indexes })) +} +#[cfg(feature = "cuda")] +mod gpu_optimisation { + use super::*; + use cudarc::driver::*; + use std::{collections::HashMap, sync::Arc}; + use tig_challenges::CudaKernel; + + // set KERNEL to None if algorithm only has a CPU implementation + pub const KERNEL: Option = None; + + // Important! your GPU and CPU version of the algorithm should return the same result + pub fn cuda_solve_challenge( + challenge: &Challenge, + dev: &Arc, + mut funcs: HashMap<&'static str, CudaFunction>, + ) -> anyhow::Result> { + solve_challenge(challenge) + } +} +#[cfg(feature = "cuda")] +pub use gpu_optimisation::{cuda_solve_challenge, KERNEL}; diff --git a/tig-algorithms/src/vector_search/brute_force_bacalhau/commercial.rs b/tig-algorithms/src/vector_search/brute_force_bacalhau/commercial.rs new file mode 100644 index 00000000..bacee69c --- /dev/null +++ b/tig-algorithms/src/vector_search/brute_force_bacalhau/commercial.rs @@ -0,0 +1,113 @@ +/*! +Copyright 2024 Louis Silva + +Licensed under the TIG Commercial License v1.0 (the "License"); you +may not use this file except in compliance with the License. You may obtain a copy +of the License at + +https://github.com/tig-foundation/tig-monorepo/tree/main/docs/licenses + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the specific +language governing permissions and limitations under the License. + */ + +use anyhow::Result; + +use tig_challenges::vector_search::*; + +#[inline] +fn l2_norm(x: &[f32]) -> f32 { + x.iter().map(|&val| val * val).sum::().sqrt() +} + +#[inline] +fn euclidean_distance_with_precomputed_norm( + a_norm_sq: f32, + b_norm_sq: f32, + ab_dot_product: f32 +) -> f32 { + (a_norm_sq + b_norm_sq - 2.0 * ab_dot_product).sqrt() +} + +pub fn solve_challenge(challenge: &Challenge) -> Result> { + let vector_database: &Vec> = &challenge.vector_database; + let query_vectors: &Vec> = &challenge.query_vectors; + let max_distance: f32 = challenge.max_distance; + + let mut indexes: Vec = Vec::with_capacity(query_vectors.len()); + let mut vector_norms_sq: Vec = Vec::with_capacity(vector_database.len()); + + let mut sum_norms_sq: f32 = 0.0; + let mut sum_squares: f32 = 0.0; + + for vector in vector_database { + let norm_sq: f32 = vector.iter().map(|&val| val * val).sum(); + sum_norms_sq += norm_sq.sqrt(); + sum_squares += norm_sq; + vector_norms_sq.push(norm_sq); + } + + let vector_norms_len: f32 = vector_norms_sq.len() as f32; + let std_dev: f32 = ((sum_squares / vector_norms_len) - (sum_norms_sq / vector_norms_len).powi(2)).sqrt(); + let norm_threshold: f32 = 2.0 * std_dev; + + for query in query_vectors { + let query_norm_sq: f32 = query.iter().map(|&val| val * val).sum(); + + let mut closest_index: Option = None; + let mut closest_distance: f32 = f32::MAX; + + for (idx, vector) in vector_database.iter().enumerate() { + let vector_norm_sq = vector_norms_sq[idx]; + if ((vector_norm_sq.sqrt() - query_norm_sq.sqrt()).abs()) > norm_threshold { + continue; + } + + let ab_dot_product: f32 = query.iter().zip(vector).map(|(&x1, &x2)| x1 * x2).sum(); + let distance: f32 = euclidean_distance_with_precomputed_norm( + query_norm_sq, + vector_norm_sq, + ab_dot_product, + ); + + if distance <= max_distance { + closest_index = Some(idx); + break; // Early exit + } else if distance < closest_distance { + closest_index = Some(idx); + closest_distance = distance; + } + } + + if let Some(index) = closest_index { + indexes.push(index); + } else { + return Ok(None); + } + } + + Ok(Some(Solution { indexes })) +} +#[cfg(feature = "cuda")] +mod gpu_optimisation { + use super::*; + use cudarc::driver::*; + use std::{collections::HashMap, sync::Arc}; + use tig_challenges::CudaKernel; + + // set KERNEL to None if algorithm only has a CPU implementation + pub const KERNEL: Option = None; + + // Important! your GPU and CPU version of the algorithm should return the same result + pub fn cuda_solve_challenge( + challenge: &Challenge, + dev: &Arc, + mut funcs: HashMap<&'static str, CudaFunction>, + ) -> anyhow::Result> { + solve_challenge(challenge) + } +} +#[cfg(feature = "cuda")] +pub use gpu_optimisation::{cuda_solve_challenge, KERNEL}; diff --git a/tig-algorithms/src/vector_search/brute_force_bacalhau/inbound.rs b/tig-algorithms/src/vector_search/brute_force_bacalhau/inbound.rs new file mode 100644 index 00000000..fd0bf1d3 --- /dev/null +++ b/tig-algorithms/src/vector_search/brute_force_bacalhau/inbound.rs @@ -0,0 +1,113 @@ +/*! +Copyright 2024 Louis Silva + +Licensed under the TIG Inbound Game License v1.0 or (at your option) any later +version (the "License"); you may not use this file except in compliance with the +License. You may obtain a copy of the License at + +https://github.com/tig-foundation/tig-monorepo/tree/main/docs/licenses + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the specific +language governing permissions and limitations under the License. + */ + +use anyhow::Result; + +use tig_challenges::vector_search::*; + +#[inline] +fn l2_norm(x: &[f32]) -> f32 { + x.iter().map(|&val| val * val).sum::().sqrt() +} + +#[inline] +fn euclidean_distance_with_precomputed_norm( + a_norm_sq: f32, + b_norm_sq: f32, + ab_dot_product: f32 +) -> f32 { + (a_norm_sq + b_norm_sq - 2.0 * ab_dot_product).sqrt() +} + +pub fn solve_challenge(challenge: &Challenge) -> Result> { + let vector_database: &Vec> = &challenge.vector_database; + let query_vectors: &Vec> = &challenge.query_vectors; + let max_distance: f32 = challenge.max_distance; + + let mut indexes: Vec = Vec::with_capacity(query_vectors.len()); + let mut vector_norms_sq: Vec = Vec::with_capacity(vector_database.len()); + + let mut sum_norms_sq: f32 = 0.0; + let mut sum_squares: f32 = 0.0; + + for vector in vector_database { + let norm_sq: f32 = vector.iter().map(|&val| val * val).sum(); + sum_norms_sq += norm_sq.sqrt(); + sum_squares += norm_sq; + vector_norms_sq.push(norm_sq); + } + + let vector_norms_len: f32 = vector_norms_sq.len() as f32; + let std_dev: f32 = ((sum_squares / vector_norms_len) - (sum_norms_sq / vector_norms_len).powi(2)).sqrt(); + let norm_threshold: f32 = 2.0 * std_dev; + + for query in query_vectors { + let query_norm_sq: f32 = query.iter().map(|&val| val * val).sum(); + + let mut closest_index: Option = None; + let mut closest_distance: f32 = f32::MAX; + + for (idx, vector) in vector_database.iter().enumerate() { + let vector_norm_sq = vector_norms_sq[idx]; + if ((vector_norm_sq.sqrt() - query_norm_sq.sqrt()).abs()) > norm_threshold { + continue; + } + + let ab_dot_product: f32 = query.iter().zip(vector).map(|(&x1, &x2)| x1 * x2).sum(); + let distance: f32 = euclidean_distance_with_precomputed_norm( + query_norm_sq, + vector_norm_sq, + ab_dot_product, + ); + + if distance <= max_distance { + closest_index = Some(idx); + break; // Early exit + } else if distance < closest_distance { + closest_index = Some(idx); + closest_distance = distance; + } + } + + if let Some(index) = closest_index { + indexes.push(index); + } else { + return Ok(None); + } + } + + Ok(Some(Solution { indexes })) +} +#[cfg(feature = "cuda")] +mod gpu_optimisation { + use super::*; + use cudarc::driver::*; + use std::{collections::HashMap, sync::Arc}; + use tig_challenges::CudaKernel; + + // set KERNEL to None if algorithm only has a CPU implementation + pub const KERNEL: Option = None; + + // Important! your GPU and CPU version of the algorithm should return the same result + pub fn cuda_solve_challenge( + challenge: &Challenge, + dev: &Arc, + mut funcs: HashMap<&'static str, CudaFunction>, + ) -> anyhow::Result> { + solve_challenge(challenge) + } +} +#[cfg(feature = "cuda")] +pub use gpu_optimisation::{cuda_solve_challenge, KERNEL}; diff --git a/tig-algorithms/src/vector_search/brute_force_bacalhau/innovator_outbound.rs b/tig-algorithms/src/vector_search/brute_force_bacalhau/innovator_outbound.rs new file mode 100644 index 00000000..a0996b69 --- /dev/null +++ b/tig-algorithms/src/vector_search/brute_force_bacalhau/innovator_outbound.rs @@ -0,0 +1,113 @@ +/*! +Copyright 2024 Louis Silva + +Licensed under the TIG Innovator Outbound Game License v1.0 (the "License"); you +may not use this file except in compliance with the License. You may obtain a copy +of the License at + +https://github.com/tig-foundation/tig-monorepo/tree/main/docs/licenses + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the specific +language governing permissions and limitations under the License. + */ + +use anyhow::Result; + +use tig_challenges::vector_search::*; + +#[inline] +fn l2_norm(x: &[f32]) -> f32 { + x.iter().map(|&val| val * val).sum::().sqrt() +} + +#[inline] +fn euclidean_distance_with_precomputed_norm( + a_norm_sq: f32, + b_norm_sq: f32, + ab_dot_product: f32 +) -> f32 { + (a_norm_sq + b_norm_sq - 2.0 * ab_dot_product).sqrt() +} + +pub fn solve_challenge(challenge: &Challenge) -> Result> { + let vector_database: &Vec> = &challenge.vector_database; + let query_vectors: &Vec> = &challenge.query_vectors; + let max_distance: f32 = challenge.max_distance; + + let mut indexes: Vec = Vec::with_capacity(query_vectors.len()); + let mut vector_norms_sq: Vec = Vec::with_capacity(vector_database.len()); + + let mut sum_norms_sq: f32 = 0.0; + let mut sum_squares: f32 = 0.0; + + for vector in vector_database { + let norm_sq: f32 = vector.iter().map(|&val| val * val).sum(); + sum_norms_sq += norm_sq.sqrt(); + sum_squares += norm_sq; + vector_norms_sq.push(norm_sq); + } + + let vector_norms_len: f32 = vector_norms_sq.len() as f32; + let std_dev: f32 = ((sum_squares / vector_norms_len) - (sum_norms_sq / vector_norms_len).powi(2)).sqrt(); + let norm_threshold: f32 = 2.0 * std_dev; + + for query in query_vectors { + let query_norm_sq: f32 = query.iter().map(|&val| val * val).sum(); + + let mut closest_index: Option = None; + let mut closest_distance: f32 = f32::MAX; + + for (idx, vector) in vector_database.iter().enumerate() { + let vector_norm_sq = vector_norms_sq[idx]; + if ((vector_norm_sq.sqrt() - query_norm_sq.sqrt()).abs()) > norm_threshold { + continue; + } + + let ab_dot_product: f32 = query.iter().zip(vector).map(|(&x1, &x2)| x1 * x2).sum(); + let distance: f32 = euclidean_distance_with_precomputed_norm( + query_norm_sq, + vector_norm_sq, + ab_dot_product, + ); + + if distance <= max_distance { + closest_index = Some(idx); + break; // Early exit + } else if distance < closest_distance { + closest_index = Some(idx); + closest_distance = distance; + } + } + + if let Some(index) = closest_index { + indexes.push(index); + } else { + return Ok(None); + } + } + + Ok(Some(Solution { indexes })) +} +#[cfg(feature = "cuda")] +mod gpu_optimisation { + use super::*; + use cudarc::driver::*; + use std::{collections::HashMap, sync::Arc}; + use tig_challenges::CudaKernel; + + // set KERNEL to None if algorithm only has a CPU implementation + pub const KERNEL: Option = None; + + // Important! your GPU and CPU version of the algorithm should return the same result + pub fn cuda_solve_challenge( + challenge: &Challenge, + dev: &Arc, + mut funcs: HashMap<&'static str, CudaFunction>, + ) -> anyhow::Result> { + solve_challenge(challenge) + } +} +#[cfg(feature = "cuda")] +pub use gpu_optimisation::{cuda_solve_challenge, KERNEL}; diff --git a/tig-algorithms/src/vector_search/brute_force_bacalhau/mod.rs b/tig-algorithms/src/vector_search/brute_force_bacalhau/mod.rs new file mode 100644 index 00000000..fcec9672 --- /dev/null +++ b/tig-algorithms/src/vector_search/brute_force_bacalhau/mod.rs @@ -0,0 +1,4 @@ +mod benchmarker_outbound; +pub use benchmarker_outbound::solve_challenge; +#[cfg(feature = "cuda")] +pub use benchmarker_outbound::{cuda_solve_challenge, KERNEL}; \ No newline at end of file diff --git a/tig-algorithms/src/vector_search/brute_force_bacalhau/open_data.rs b/tig-algorithms/src/vector_search/brute_force_bacalhau/open_data.rs new file mode 100644 index 00000000..c55800e4 --- /dev/null +++ b/tig-algorithms/src/vector_search/brute_force_bacalhau/open_data.rs @@ -0,0 +1,113 @@ +/*! +Copyright 2024 Louis Silva + +Licensed under the TIG Open Data License v1.0 or (at your option) any later version +(the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +https://github.com/tig-foundation/tig-monorepo/tree/main/docs/licenses + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the specific +language governing permissions and limitations under the License. + */ + +use anyhow::Result; + +use tig_challenges::vector_search::*; + +#[inline] +fn l2_norm(x: &[f32]) -> f32 { + x.iter().map(|&val| val * val).sum::().sqrt() +} + +#[inline] +fn euclidean_distance_with_precomputed_norm( + a_norm_sq: f32, + b_norm_sq: f32, + ab_dot_product: f32 +) -> f32 { + (a_norm_sq + b_norm_sq - 2.0 * ab_dot_product).sqrt() +} + +pub fn solve_challenge(challenge: &Challenge) -> Result> { + let vector_database: &Vec> = &challenge.vector_database; + let query_vectors: &Vec> = &challenge.query_vectors; + let max_distance: f32 = challenge.max_distance; + + let mut indexes: Vec = Vec::with_capacity(query_vectors.len()); + let mut vector_norms_sq: Vec = Vec::with_capacity(vector_database.len()); + + let mut sum_norms_sq: f32 = 0.0; + let mut sum_squares: f32 = 0.0; + + for vector in vector_database { + let norm_sq: f32 = vector.iter().map(|&val| val * val).sum(); + sum_norms_sq += norm_sq.sqrt(); + sum_squares += norm_sq; + vector_norms_sq.push(norm_sq); + } + + let vector_norms_len: f32 = vector_norms_sq.len() as f32; + let std_dev: f32 = ((sum_squares / vector_norms_len) - (sum_norms_sq / vector_norms_len).powi(2)).sqrt(); + let norm_threshold: f32 = 2.0 * std_dev; + + for query in query_vectors { + let query_norm_sq: f32 = query.iter().map(|&val| val * val).sum(); + + let mut closest_index: Option = None; + let mut closest_distance: f32 = f32::MAX; + + for (idx, vector) in vector_database.iter().enumerate() { + let vector_norm_sq = vector_norms_sq[idx]; + if ((vector_norm_sq.sqrt() - query_norm_sq.sqrt()).abs()) > norm_threshold { + continue; + } + + let ab_dot_product: f32 = query.iter().zip(vector).map(|(&x1, &x2)| x1 * x2).sum(); + let distance: f32 = euclidean_distance_with_precomputed_norm( + query_norm_sq, + vector_norm_sq, + ab_dot_product, + ); + + if distance <= max_distance { + closest_index = Some(idx); + break; // Early exit + } else if distance < closest_distance { + closest_index = Some(idx); + closest_distance = distance; + } + } + + if let Some(index) = closest_index { + indexes.push(index); + } else { + return Ok(None); + } + } + + Ok(Some(Solution { indexes })) +} +#[cfg(feature = "cuda")] +mod gpu_optimisation { + use super::*; + use cudarc::driver::*; + use std::{collections::HashMap, sync::Arc}; + use tig_challenges::CudaKernel; + + // set KERNEL to None if algorithm only has a CPU implementation + pub const KERNEL: Option = None; + + // Important! your GPU and CPU version of the algorithm should return the same result + pub fn cuda_solve_challenge( + challenge: &Challenge, + dev: &Arc, + mut funcs: HashMap<&'static str, CudaFunction>, + ) -> anyhow::Result> { + solve_challenge(challenge) + } +} +#[cfg(feature = "cuda")] +pub use gpu_optimisation::{cuda_solve_challenge, KERNEL}; diff --git a/tig-algorithms/src/vector_search/mod.rs b/tig-algorithms/src/vector_search/mod.rs index ae3472ac..4990304e 100644 --- a/tig-algorithms/src/vector_search/mod.rs +++ b/tig-algorithms/src/vector_search/mod.rs @@ -24,7 +24,8 @@ // c004_a013 -// c004_a014 +pub mod brute_force_bacalhau; +pub use brute_force_bacalhau as c004_a014; // c004_a015 diff --git a/tig-algorithms/src/vector_search/template.rs b/tig-algorithms/src/vector_search/template.rs index eddf8a0e..0f5fa1e2 100644 --- a/tig-algorithms/src/vector_search/template.rs +++ b/tig-algorithms/src/vector_search/template.rs @@ -1,11 +1,7 @@ /*! -Copyright [year copyright work created] [name of copyright owner] +Copyright [yyyy] [name of copyright owner] -Identity of Submitter [name of person or entity that submits the Work to TIG] - -UAI [UAI (if applicable)] - -Licensed under the TIG Inbound Game License v2.0 or (at your option) any later +Licensed under the TIG Inbound Game License v1.0 or (at your option) any later version (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at @@ -17,24 +13,6 @@ CONDITIONS OF ANY KIND, either express or implied. See the License for the speci language governing permissions and limitations under the License. */ -// REMOVE BELOW SECTION IF UNUSED -/* -REFERENCES AND ACKNOWLEDGMENTS - -This implementation is based on or inspired by existing work. Citations and -acknowledgments below: - -1. Academic Papers: - - [Author(s), "Paper Title", DOI (if available)] - -2. Code References: - - [Author(s), URL] - -3. Other: - - [Author(s), Details] - -*/ - // TIG's UI uses the pattern `tig_challenges::` to automatically detect your algorithm's challenge use anyhow::{anyhow, Result}; use tig_challenges::vector_search::{Challenge, Solution}; diff --git a/tig-algorithms/wasm/vector_search/brute_force_bacalhau.wasm b/tig-algorithms/wasm/vector_search/brute_force_bacalhau.wasm new file mode 100644 index 00000000..9c34e155 Binary files /dev/null and b/tig-algorithms/wasm/vector_search/brute_force_bacalhau.wasm differ