Skip to content

Commit

Permalink
refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
eddyxu committed Mar 1, 2024
1 parent 6924ed9 commit b19120c
Show file tree
Hide file tree
Showing 6 changed files with 2 additions and 12 deletions.
1 change: 0 additions & 1 deletion rust/lance-index/src/vector/ivf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ use arrow_array::{
};
use arrow_schema::{DataType, Field};
use async_trait::async_trait;
use futures::StreamExt;
use snafu::{location, Location};

pub use builder::IvfBuildParams;
Expand Down
5 changes: 0 additions & 5 deletions rust/lance-index/src/vector/ivf/shuffler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ fn get_temp_dir() -> Result<Path> {
#[allow(clippy::too_many_arguments)]
pub async fn shuffle_dataset(
data: impl RecordBatchStream + Unpin + 'static,
column: &str,
ivf: Arc<dyn crate::vector::ivf::Ivf>,
precomputed_partitions: Option<HashMap<u64, u32>>,
num_partitions: u32,
Expand All @@ -90,8 +89,6 @@ pub async fn shuffle_dataset(
shuffle_partition_concurrency: usize,
precomputed_shuffle_buffers: Option<(Path, Vec<String>)>,
) -> Result<Vec<impl Stream<Item = Result<RecordBatch>>>> {
let column: Arc<str> = column.into();

// TODO: dynamically detect schema from the transforms.
let schema = Arc::new(arrow_schema::Schema::new(vec![
ROW_ID_FIELD.clone(),
Expand Down Expand Up @@ -131,8 +128,6 @@ pub async fn shuffle_dataset(
let stream = data
.zip(repeat_with(move || ivf.clone()))
.map(move |(b, ivf)| {
let col_ref = column.clone();

// If precomputed_partitions map is provided, use it
// for fast partitions.
let partition_map = precomputed_partitions
Expand Down
4 changes: 1 addition & 3 deletions rust/lance-index/src/vector/ivf/transform.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,7 @@ use std::ops::Range;
use std::sync::Arc;

use arrow_array::types::UInt32Type;
use arrow_array::{
cast::AsArray, Array, ArrowPrimitiveType, FixedSizeListArray, RecordBatch, UInt32Array,
};
use arrow_array::{cast::AsArray, Array, ArrowPrimitiveType, RecordBatch, UInt32Array};
use arrow_schema::Field;
use futures::{stream, StreamExt};
use log::info;
Expand Down
2 changes: 1 addition & 1 deletion rust/lance-linalg/src/kmeans.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ use crate::{
distance::{
dot_distance,
l2::{l2, l2_distance_batch, L2},
Cosine, Dot, MetricType,
Dot, MetricType,
},
kernels::argmin_value,
matrix::MatrixView,
Expand Down
1 change: 0 additions & 1 deletion rust/lance/src/index/vector/ivf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,6 @@ pub(crate) async fn optimize_vector_indices(
Some(
shuffle_dataset(
stream,
vector_column,
ivf,
None,
first_idx.ivf.num_partitions() as u32,
Expand Down
1 change: 0 additions & 1 deletion rust/lance/src/index/vector/ivf/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ pub(super) async fn build_partitions(

let stream = shuffle_dataset(
data,
column,
ivf_model,
precomputed_partitons,
ivf.num_partitions() as u32,
Expand Down

0 comments on commit b19120c

Please sign in to comment.