Skip to content

Commit

Permalink
feat!(ffi): remove visit_snapshot_schema, add logical_schema (#709)
Browse files Browse the repository at this point in the history
## What changes are proposed in this pull request?
This PR removes the old `visit_snapshot_schema` introduced in #683 - we
should just go ahead and do the 'right thing' with having a
`visit_schema` (introduced in the other PR) and a `logical_schema()`
function (added here) in order to facilitate visiting the snapshot
schema. Additionally I've moved the schema-related things up from `scan`
module to top-level in ffi crate. Exact changes listed below; this PR
updates tests/examples to leverage the new changes.

### This PR affects the following public APIs

1. Remove `visit_snapshot_schema()` API
2. Add a new `logical_schema(snapshot)` API so you can get the schema of
a snapshot and use the `visit_schema` directly
3. Renames `free_global_read_schema` to just `free_schema`
4. Moves `SharedSchema` and `free_schema` up from `mod scan` into
top-level `ffi` crate.


## How was this change tested?
existing UT
  • Loading branch information
zachschuermann authored Feb 21, 2025
1 parent baa3fc3 commit ca18e7f
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 37 deletions.
4 changes: 3 additions & 1 deletion ffi/examples/read-table/schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -273,13 +273,15 @@ void print_schema(SharedSnapshot* snapshot)
.visit_timestamp = visit_timestamp,
.visit_timestamp_ntz = visit_timestamp_ntz,
};
uintptr_t schema_list_id = visit_snapshot_schema(snapshot, &visitor);
SharedSchema* schema = logical_schema(snapshot);
uintptr_t schema_list_id = visit_schema(schema, &visitor);
#ifdef VERBOSE
printf("Schema returned in list %" PRIxPTR "\n", schema_list_id);
#endif
print_diag("Done building schema\n");
printf("Schema:\n");
print_list(&builder, schema_list_id, 0, 0);
printf("\n");
free_schema(schema);
free_builder(builder);
}
8 changes: 4 additions & 4 deletions ffi/src/engine_funcs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@
use std::sync::Arc;

use delta_kernel::schema::{DataType, Schema, SchemaRef};
use delta_kernel::{
schema::{DataType, Schema, SchemaRef},
DeltaResult, EngineData, Expression, ExpressionEvaluator, FileDataReadResultIterator,
};
use delta_kernel_ffi_macros::handle_descriptor;
use tracing::debug;
use url::Url;

use crate::{
scan::SharedSchema, ExclusiveEngineData, ExternEngine, ExternResult, IntoExternResult,
KernelStringSlice, NullableCvoid, SharedExternEngine, TryFromStringSlice,
ExclusiveEngineData, ExternEngine, ExternResult, IntoExternResult, KernelStringSlice,
NullableCvoid, SharedExternEngine, SharedSchema, TryFromStringSlice,
};

use super::handle::Handle;
Expand Down Expand Up @@ -209,7 +209,7 @@ fn evaluate_impl(
#[cfg(test)]
mod tests {
use super::{free_evaluator, get_evaluator};
use crate::{free_engine, handle::Handle, scan::SharedSchema, tests::get_default_engine};
use crate::{free_engine, handle::Handle, tests::get_default_engine, SharedSchema};
use delta_kernel::{
schema::{DataType, StructField, StructType},
Expression,
Expand Down
24 changes: 24 additions & 0 deletions ffi/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use std::sync::Arc;
use tracing::debug;
use url::Url;

use delta_kernel::schema::Schema;
use delta_kernel::snapshot::Snapshot;
use delta_kernel::{DeltaResult, Engine, EngineData, Table};
use delta_kernel_ffi_macros::handle_descriptor;
Expand Down Expand Up @@ -561,6 +562,9 @@ pub unsafe extern "C" fn free_engine(engine: Handle<SharedExternEngine>) {
engine.drop_handle();
}

#[handle_descriptor(target=Schema, mutable=false, sized=true)]
pub struct SharedSchema;

#[handle_descriptor(target=Snapshot, mutable=false, sized=true)]
pub struct SharedSnapshot;

Expand Down Expand Up @@ -607,6 +611,26 @@ pub unsafe extern "C" fn version(snapshot: Handle<SharedSnapshot>) -> u64 {
snapshot.version()
}

/// Get the logical schema of the specified snapshot
///
/// # Safety
///
/// Caller is responsible for passing a valid snapshot handle.
#[no_mangle]
pub unsafe extern "C" fn logical_schema(snapshot: Handle<SharedSnapshot>) -> Handle<SharedSchema> {
let snapshot = unsafe { snapshot.as_ref() };
Arc::new(snapshot.schema().clone()).into()
}

/// Free a schema
///
/// # Safety
/// Engine is responsible for providing a valid schema handle.
#[no_mangle]
pub unsafe extern "C" fn free_schema(schema: Handle<SharedSchema>) {
schema.drop_handle();
}

/// Get the resolved root of the table. This should be used in any future calls that require
/// constructing a path
///
Expand Down
15 changes: 2 additions & 13 deletions ffi/src/scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ use std::sync::{Arc, Mutex};

use delta_kernel::scan::state::{visit_scan_files, DvInfo, GlobalScanState};
use delta_kernel::scan::{Scan, ScanData};
use delta_kernel::schema::Schema;
use delta_kernel::snapshot::Snapshot;
use delta_kernel::{DeltaResult, Error, Expression, ExpressionRef};
use delta_kernel_ffi_macros::handle_descriptor;
Expand All @@ -19,7 +18,8 @@ use crate::expressions::SharedExpression;
use crate::{
kernel_string_slice, AllocateStringFn, ExclusiveEngineData, ExternEngine, ExternResult,
IntoExternResult, KernelBoolSlice, KernelRowIndexArray, KernelStringSlice, NullableCvoid,
SharedExternEngine, SharedSnapshot, StringIter, StringSliceIterator, TryFromStringSlice,
SharedExternEngine, SharedSchema, SharedSnapshot, StringIter, StringSliceIterator,
TryFromStringSlice,
};

use super::handle::Handle;
Expand Down Expand Up @@ -71,8 +71,6 @@ fn scan_impl(

#[handle_descriptor(target=GlobalScanState, mutable=false, sized=true)]
pub struct SharedGlobalScanState;
#[handle_descriptor(target=Schema, mutable=false, sized=true)]
pub struct SharedSchema;

/// Get the global state for a scan. See the docs for [`delta_kernel::scan::state::GlobalScanState`]
/// for more information.
Expand Down Expand Up @@ -113,15 +111,6 @@ pub unsafe extern "C" fn get_global_logical_schema(
state.logical_schema.clone().into()
}

/// Free a schema
///
/// # Safety
/// Engine is responsible for providing a valid schema obtained via [`get_global_read_schema`]
#[no_mangle]
pub unsafe extern "C" fn free_schema(schema: Handle<SharedSchema>) {
schema.drop_handle();
}

/// Get a count of the number of partition columns for this scan
///
/// # Safety
Expand Down
22 changes: 3 additions & 19 deletions ffi/src/schema.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use std::os::raw::c_void;

use crate::scan::{CStringMap, SharedSchema};
use crate::{handle::Handle, kernel_string_slice, KernelStringSlice, SharedSnapshot};
use crate::handle::Handle;
use crate::scan::CStringMap;
use crate::{kernel_string_slice, KernelStringSlice, SharedSchema};
use delta_kernel::schema::{ArrayType, DataType, MapType, PrimitiveType, StructType};

/// The `EngineSchemaVisitor` defines a visitor system to allow engines to build their own
Expand Down Expand Up @@ -192,23 +193,6 @@ pub struct EngineSchemaVisitor {
),
}

/// Visit the schema of the passed `SnapshotHandle`, using the provided `visitor`. See the
/// documentation of [`EngineSchemaVisitor`] for a description of how this visitor works.
///
/// This method returns the id of the list allocated to hold the top level schema columns.
///
/// # Safety
///
/// Caller is responsible for passing a valid snapshot handle and schema visitor.
#[no_mangle]
pub unsafe extern "C" fn visit_snapshot_schema(
snapshot: Handle<SharedSnapshot>,
visitor: &mut EngineSchemaVisitor,
) -> usize {
let snapshot = unsafe { snapshot.as_ref() };
visit_schema_impl(snapshot.schema(), visitor)
}

/// Visit the given `schema` using the provided `visitor`. See the documentation of
/// [`EngineSchemaVisitor`] for a description of how this visitor works.
///
Expand Down
1 change: 1 addition & 0 deletions kernel/src/snapshot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ impl Snapshot {
}

/// Table [`Schema`] at this `Snapshot`s version.
// TODO should this return SchemaRef?
pub fn schema(&self) -> &Schema {
self.table_configuration.schema()
}
Expand Down

0 comments on commit ca18e7f

Please sign in to comment.