Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Engines now return FileMeta with correct millisecond timestamps #565

Merged
30 changes: 29 additions & 1 deletion kernel/src/engine/default/filesystem.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ impl<E: TaskExecutor> FileSystemClient for ObjectStoreFileSystemClient<E> {
sender
.send(Ok(FileMeta {
location,
last_modified: meta.last_modified.timestamp(),
last_modified: meta.last_modified.timestamp_millis(),
size: meta.size,
}))
.ok();
Expand Down Expand Up @@ -155,7 +155,9 @@ impl<E: TaskExecutor> FileSystemClient for ObjectStoreFileSystemClient<E> {
#[cfg(test)]
mod tests {
use std::ops::Range;
use std::time::{Duration, SystemTime, UNIX_EPOCH};

use object_store::memory::InMemory;
use object_store::{local::LocalFileSystem, ObjectStore};

use test_utils::delta_path_for_version;
Expand Down Expand Up @@ -216,6 +218,32 @@ mod tests {
assert_eq!(data[2], Bytes::from("el-da"));
}

#[tokio::test]
async fn test_file_meta_is_correct() {
let store = Arc::new(InMemory::new());

let begin_time = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();

let data = Bytes::from("kernel-data");
let name = delta_path_for_version(1, "json");
store.put(&name, data.clone().into()).await.unwrap();

let table_root = Url::parse("memory:///").expect("valid url");
let prefix = Path::from_url_path(table_root.path()).expect("Couldn't get path");
let engine = DefaultEngine::new(store, prefix, Arc::new(TokioBackgroundExecutor::new()));
let files: Vec<_> = engine
.get_file_system_client()
.list_from(&table_root)
.unwrap()
.try_collect()
.unwrap();

assert!(!files.is_empty());
for meta in files.into_iter() {
let meta_time = Duration::from_millis(meta.last_modified.try_into().unwrap());
assert!(meta_time.abs_diff(begin_time) < Duration::from_secs(10));
}
}
#[tokio::test]
async fn test_default_engine_listing() {
let tmp = tempfile::tempdir().unwrap();
Expand Down
51 changes: 28 additions & 23 deletions kernel/src/engine/sync/fs_client.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use std::time::SystemTime;

use bytes::Bytes;
use itertools::Itertools;
use url::Url;
Expand Down Expand Up @@ -49,26 +47,7 @@ impl FileSystemClient for SyncFilesystemClient {
let it = all_ents
.into_iter()
.sorted_by_key(|ent| ent.path())
.map(|ent| {
ent.metadata().map_err(Error::IOError).and_then(|metadata| {
let last_modified: u64 = metadata
.modified()
.map(
|modified| match modified.duration_since(SystemTime::UNIX_EPOCH) {
Ok(d) => d.as_secs(),
Err(_) => 0,
},
)
.unwrap_or(0);
Url::from_file_path(ent.path())
.map(|location| FileMeta {
location,
last_modified: last_modified as i64,
size: metadata.len() as usize,
})
.map_err(|_| Error::Generic(format!("Invalid path: {:?}", ent.path())))
})
});
.map(TryFrom::try_from);
Ok(Box::new(it))
} else {
Err(Error::generic("Can only read local filesystem"))
Expand Down Expand Up @@ -97,10 +76,12 @@ impl FileSystemClient for SyncFilesystemClient {

#[cfg(test)]
mod tests {
use std::fs::File;
use std::io::Write;
use std::time::{Duration, SystemTime};
use std::{fs::File, time::UNIX_EPOCH};

use bytes::{BufMut, BytesMut};
use itertools::Itertools;
use url::Url;

use super::SyncFilesystemClient;
Expand All @@ -111,6 +92,30 @@ mod tests {
format!("{index:020}.json")
}

#[test]
fn test_file_meta_is_correct() -> Result<(), Box<dyn std::error::Error>> {
let client = SyncFilesystemClient;
let tmp_dir = tempfile::tempdir().unwrap();

let begin_time = SystemTime::now().duration_since(UNIX_EPOCH)?;

let path = tmp_dir.path().join(get_json_filename(1));
let mut f = File::create(path)?;
writeln!(f, "null")?;
f.flush()?;

let url_path = tmp_dir.path().join(get_json_filename(1));
let url = Url::from_file_path(url_path).unwrap();
let files: Vec<_> = client.list_from(&url)?.try_collect()?;

assert!(!files.is_empty());
for meta in files.iter() {
let meta_time = Duration::from_millis(meta.last_modified.try_into()?);
assert!(meta_time.abs_diff(begin_time) < Duration::from_secs(10));
}
Ok(())
}

#[test]
fn test_list_from() -> Result<(), Box<dyn std::error::Error>> {
let client = SyncFilesystemClient;
Expand Down
27 changes: 27 additions & 0 deletions kernel/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@
)]

use std::any::Any;
use std::fs::DirEntry;
use std::sync::Arc;
use std::time::SystemTime;
use std::{cmp::Ordering, ops::Range};

use bytes::Bytes;
Expand Down Expand Up @@ -142,6 +144,31 @@ impl PartialOrd for FileMeta {
}
}

impl TryFrom<DirEntry> for FileMeta {
type Error = Error;

fn try_from(ent: DirEntry) -> DeltaResult<FileMeta> {
let metadata = ent.metadata()?;
let last_modified = metadata
.modified()?
.duration_since(SystemTime::UNIX_EPOCH)
.map_err(|_| Error::generic("Failed to convert file timestamp to milliseconds"))?;
let location = Url::from_file_path(ent.path())
.map_err(|_| Error::generic(format!("Invalid path: {:?}", ent.path())))?;
let last_modified = last_modified.as_millis().try_into().map_err(|_| {
Error::generic(format!(
"Failed to convert file modification time {:?} into i64",
last_modified.as_millis()
))
})?;
Ok(FileMeta {
location,
last_modified,
size: metadata.len() as usize,
})
}
}

impl FileMeta {
/// Create a new instance of `FileMeta`
pub fn new(location: Url, last_modified: i64, size: usize) -> Self {
Expand Down
Loading