Skip to content

Commit

Permalink
new Snapshot::new_from() API
Browse files Browse the repository at this point in the history
  • Loading branch information
zachschuermann committed Nov 27, 2024
1 parent 953ceed commit f8bc074
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 2 deletions.
42 changes: 41 additions & 1 deletion kernel/src/snapshot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ impl Snapshot {
///
/// - `table_root`: url pointing at the table root (where `_delta_log` folder is located)
/// - `engine`: Implementation of [`Engine`] apis.
/// - `version`: target version of the [`Snapshot`]
/// - `version`: target version of the [`Snapshot`]. None will create a snapshot at the latest
/// version of the table.
pub fn try_new(
table_root: Url,
engine: &dyn Engine,
Expand All @@ -71,6 +72,26 @@ impl Snapshot {
Self::try_new_from_log_segment(table_root, log_segment, engine)
}

/// Create a new [`Snapshot`] instance from an existing [`Snapshot`]. This is useful when you
/// already have a [`Snapshot`] lying around and want to do the minimal work to 'update' the
/// snapshot to a later version.
///
/// # Parameters
///
/// - `existing_snapshot`: reference to an existing [`Snapshot`]
/// - `engine`: Implementation of [`Engine`] apis.
/// - `version`: target version of the [`Snapshot`]. None will create a snapshot at the latest
/// version of the table.
pub fn new_from(
existing_snapshot: &Snapshot,
engine: &dyn Engine,
version: Option<Version>,
) -> DeltaResult<Self> {
// TODO(zach): for now we just pass through to the old API. We should instead optimize this
// to avoid replaying overlapping LogSegments.
Self::try_new(existing_snapshot.table_root.clone(), engine, version)
}

/// Create a new [`Snapshot`] instance.
pub(crate) fn try_new_from_log_segment(
location: Url,
Expand Down Expand Up @@ -250,6 +271,25 @@ mod tests {
assert_eq!(snapshot.schema(), &expected);
}

#[test]
fn test_snapshot_new_from() {
let path =
std::fs::canonicalize(PathBuf::from("./tests/data/table-with-dv-small/")).unwrap();
let url = url::Url::from_directory_path(path).unwrap();

let engine = SyncEngine::new();
let old_snapshot = Snapshot::try_new(url, &engine, Some(0)).unwrap();
let snapshot = Snapshot::new_from(&old_snapshot, &engine, Some(0)).unwrap();

let expected =
Protocol::try_new(3, 7, Some(["deletionVectors"]), Some(["deletionVectors"])).unwrap();
assert_eq!(snapshot.protocol(), &expected);

let schema_string = r#"{"type":"struct","fields":[{"name":"value","type":"integer","nullable":true,"metadata":{}}]}"#;
let expected: StructType = serde_json::from_str(schema_string).unwrap();
assert_eq!(snapshot.schema(), &expected);
}

#[test]
fn test_read_table_with_last_checkpoint() {
let path = std::fs::canonicalize(PathBuf::from(
Expand Down
2 changes: 1 addition & 1 deletion kernel/src/table_changes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ impl TableChanges {
// supported for every protocol action in the CDF range.
let start_snapshot =
Snapshot::try_new(table_root.as_url().clone(), engine, Some(start_version))?;
let end_snapshot = Snapshot::try_new(table_root.as_url().clone(), engine, end_version)?;
let end_snapshot = Snapshot::new_from(&start_snapshot, engine, end_version)?;

// Verify CDF is enabled at the beginning and end of the interval to fail early. We must
// still check that CDF is enabled for every metadata action in the CDF range.
Expand Down

0 comments on commit f8bc074

Please sign in to comment.