Skip to content

Commit

Permalink
implement size of stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
wjones127 committed May 29, 2024
1 parent 78a7108 commit 885db39
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 3 deletions.
3 changes: 2 additions & 1 deletion rust/lance-table/src/rowids.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ mod index;
mod segment;
mod serde;

use deepsize::DeepSizeOf;
// These are the public API.
pub use index::RowIdIndex;
pub use serde::{read_row_ids, write_row_ids};
Expand All @@ -40,7 +41,7 @@ use segment::U64Segment;
/// contiguous or sorted.
///
/// We can make optimizations that assume uniqueness.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, DeepSizeOf)]
pub struct RowIdSequence(Vec<U64Segment>);

impl std::fmt::Display for RowIdSequence {
Expand Down
4 changes: 3 additions & 1 deletion rust/lance-table/src/rowids/bitmap.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The Lance Authors

#[derive(PartialEq, Eq, Clone)]
use deepsize::DeepSizeOf;

#[derive(PartialEq, Eq, Clone, DeepSizeOf)]
pub struct Bitmap {
pub data: Vec<u8>,
pub len: usize,
Expand Down
4 changes: 3 additions & 1 deletion rust/lance-table/src/rowids/encoded_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@

use std::ops::Range;

use deepsize::DeepSizeOf;

/// Encoded array of u64 values.
///
/// This is a internal data type used as part of row id indices.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, DeepSizeOf)]
pub enum EncodedU64Array {
/// u64 values represented as u16 offset from a base value.
///
Expand Down
15 changes: 15 additions & 0 deletions rust/lance-table/src/rowids/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

use std::ops::RangeInclusive;

use deepsize::DeepSizeOf;
use lance_core::utils::address::RowAddress;
use lance_core::{Error, Result};
use rangemap::RangeInclusiveMap;
Expand Down Expand Up @@ -57,6 +58,20 @@ impl RowIdIndex {
}
}

impl DeepSizeOf for RowIdIndex {
fn deep_size_of_children(&self, context: &mut deepsize::Context) -> usize {
self.0
.iter()
.map(|(_, (row_id_segment, address_segment))| {
(2 * std::mem::size_of::<u64>())
+ std::mem::size_of::<(U64Segment, U64Segment)>()
+ row_id_segment.deep_size_of_children(context)
+ address_segment.deep_size_of_children(context)
})
.sum()
}
}

fn decompose_sequence(
fragment_id: u32,
sequence: &RowIdSequence,
Expand Down
14 changes: 14 additions & 0 deletions rust/lance-table/src/rowids/segment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

use std::ops::{Range, RangeInclusive};

use deepsize::DeepSizeOf;

use super::{bitmap::Bitmap, encoded_array::EncodedU64Array};

/// Different ways to represent a sequence of distinct u64s.
Expand Down Expand Up @@ -61,6 +63,18 @@ pub enum U64Segment {
Array(EncodedU64Array),
}

impl DeepSizeOf for U64Segment {
fn deep_size_of_children(&self, context: &mut deepsize::Context) -> usize {
match self {
Self::Range(_) => 0,
Self::RangeWithHoles { holes, .. } => holes.deep_size_of_children(context),
Self::RangeWithBitmap { bitmap, .. } => bitmap.deep_size_of_children(context),
Self::SortedArray(array) => array.deep_size_of_children(context),
Self::Array(array) => array.deep_size_of_children(context),
}
}
}

/// Statistics about a segment of u64s.
#[derive(Debug)]
struct SegmentStats {
Expand Down

0 comments on commit 885db39

Please sign in to comment.