Skip to content

Commit

Permalink
Merge branch 'main' into fix-allow-pickling
Browse files Browse the repository at this point in the history
  • Loading branch information
mpiannucci authored Feb 28, 2025
2 parents c8127c2 + 6eb39af commit cc26e3d
Show file tree
Hide file tree
Showing 8 changed files with 159 additions and 20 deletions.
54 changes: 54 additions & 0 deletions .github/workflows/windows-check.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
name: Windows tests
on:
pull_request:
types: [opened, reopened, synchronize, labeled]
push:
branches:
- main

env:
CARGO_INCREMENTAL: 0
CARGO_NET_RETRY: 10
CI: 1
RUST_BACKTRACE: short
RUSTFLAGS: "-D warnings -W unreachable-pub -W bare-trait-objects"
RUSTUP_MAX_RETRIES: 10

jobs:
rust:
name: Run rust tests in windows
timeout-minutes: 20
runs-on: windows-latest
#defaults:
# run:
# working-directory: ./
#permissions:
#contents: read
#actions: read
#pull-requests: read
env:
#CC: deny_c
RUST_CHANNEL: 'stable'


steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}

- name: Install Rust toolchain
run: |
rustup update --no-self-update ${{ env.RUST_CHANNEL }}
rustup component add --toolchain ${{ env.RUST_CHANNEL }} rustfmt rust-src clippy
rustup default ${{ env.RUST_CHANNEL }}
- name: Cache Dependencies
uses: Swatinem/rust-cache@v2
with:
# workspaces: "rust -> target"
key: windows-${{ env.RUST_CHANNEL }}

- name: Check
run: |
cargo test --lib
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions Changelog.python.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

### Python Icechunk Library 0.2.4

### Fixes

- Fixes a bug where object storage paths were incorrectly formatted when using Windows.

## Python Icechunk Library 0.2.3

### Features
Expand Down
4 changes: 2 additions & 2 deletions icechunk-python/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "icechunk-python"
version = "0.2.3"
version = "0.2.4"
description = "Transactional storage engine for Zarr designed for use on cloud object storage"
readme = "../README.md"
repository = "https://github.com/earth-mover/icechunk"
Expand All @@ -21,7 +21,7 @@ crate-type = ["cdylib"]
bytes = "1.9.0"
chrono = { version = "0.4.39" }
futures = "0.3.31"
icechunk = { path = "../icechunk", version = "0.2.3", features = ["logs"] }
icechunk = { path = "../icechunk", version = "0.2.4", features = ["logs"] }
itertools = "0.14.0"
pyo3 = { version = "0.23", features = [
"chrono",
Expand Down
2 changes: 1 addition & 1 deletion icechunk/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "icechunk"
version = "0.2.3"
version = "0.2.4"
description = "Transactional storage engine for Zarr designed for use on cloud object storage"
readme = "../README.md"
repository = "https://github.com/earth-mover/icechunk"
Expand Down
19 changes: 18 additions & 1 deletion icechunk/src/storage/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -712,10 +712,27 @@ pub async fn new_gcs_storage(
#[allow(clippy::unwrap_used, clippy::panic)]
mod tests {

use std::collections::HashSet;
use std::{collections::HashSet, fs::File, io::Write, path::PathBuf};

use super::*;
use proptest::prelude::*;
use tempfile::TempDir;

#[tokio::test]
async fn test_is_clean() {
let repo_dir = TempDir::new().unwrap();
let s = new_local_filesystem_storage(repo_dir.path()).await.unwrap();
assert!(s.root_is_clean().await.unwrap());

let mut file = File::create(repo_dir.path().join("foo.txt")).unwrap();
write!(file, "hello").unwrap();
assert!(!s.root_is_clean().await.unwrap());

let inside_existing =
PathBuf::from_iter([repo_dir.path().as_os_str().to_str().unwrap(), "foo"]);
let s = new_local_filesystem_storage(&inside_existing).await.unwrap();
assert!(s.root_is_clean().await.unwrap());
}

proptest! {
#![proptest_config(ProptestConfig {
Expand Down
33 changes: 32 additions & 1 deletion icechunk/src/storage/object_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ impl ObjectStorage {
Arc::new(LocalFileSystemObjectStoreBackend { path: prefix.to_path_buf() });
let client = backend.mk_object_store().await?;
let storage = ObjectStorage { backend, client: OnceCell::new_with(Some(client)) };

Ok(storage)
}

Expand Down Expand Up @@ -1022,6 +1021,8 @@ mod tests {

use tempfile::TempDir;

use crate::format::{ChunkId, ManifestId, SnapshotId};

use super::ObjectStorage;

#[tokio::test]
Expand Down Expand Up @@ -1071,4 +1072,34 @@ mod tests {
ObjectStorage::new_local_filesystem(PathBuf::from(&rel_path).as_path()).await;
assert!(store.is_ok());
}

#[tokio::test]
async fn test_object_store_paths() {
let store = ObjectStorage::new_local_filesystem(PathBuf::from(".").as_path())
.await
.unwrap();

let ref_key = "ref_key";
let ref_path = store.ref_key(ref_key);
assert_eq!(ref_path.to_string(), format!("refs/{ref_key}"));

let snapshot_id = SnapshotId::random();
let snapshot_path = store.get_snapshot_path(&snapshot_id);
assert_eq!(snapshot_path.to_string(), format!("snapshots/{snapshot_id}"));

let manifest_id = ManifestId::random();
let manifest_path = store.get_manifest_path(&manifest_id);
assert_eq!(manifest_path.to_string(), format!("manifests/{manifest_id}"));

let chunk_id = ChunkId::random();
let chunk_path = store.get_chunk_path(&chunk_id);
assert_eq!(chunk_path.to_string(), format!("chunks/{chunk_id}"));

let transaction_id = SnapshotId::random();
let transaction_path = store.get_transaction_path(&transaction_id);
assert_eq!(
transaction_path.to_string(),
format!("transactions/{transaction_id}")
);
}
}
57 changes: 44 additions & 13 deletions icechunk/src/storage/s3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ use aws_sdk_s3::{
use aws_smithy_types_convert::{date_time::DateTimeExt, stream::PaginationStreamExt};
use bytes::{Buf, Bytes};
use chrono::{DateTime, Utc};
use err_into::ErrorInto as _;
use futures::{
stream::{self, BoxStream},
StreamExt, TryStreamExt,
Expand Down Expand Up @@ -150,10 +149,10 @@ impl S3Storage {

fn get_path_str(&self, file_prefix: &str, id: &str) -> StorageResult<String> {
let path = PathBuf::from_iter([self.prefix.as_str(), file_prefix, id]);
path.into_os_string()
.into_string()
.map_err(StorageErrorKind::BadPrefix)
.err_into()
let path_str =
path.into_os_string().into_string().map_err(StorageErrorKind::BadPrefix)?;

Ok(path_str.replace("\\", "/"))
}

fn get_path<const SIZE: usize, T: FileTypeTag>(
Expand Down Expand Up @@ -187,10 +186,10 @@ impl S3Storage {

fn ref_key(&self, ref_key: &str) -> StorageResult<String> {
let path = PathBuf::from_iter([self.prefix.as_str(), REF_PREFIX, ref_key]);
path.into_os_string()
.into_string()
.map_err(StorageErrorKind::BadPrefix)
.err_into()
let path_str =
path.into_os_string().into_string().map_err(StorageErrorKind::BadPrefix)?;

Ok(path_str.replace("\\", "/"))
}

async fn get_object_reader(
Expand Down Expand Up @@ -612,10 +611,7 @@ impl Storage for S3Storage {
_settings: &Settings,
prefix: &str,
) -> StorageResult<BoxStream<'a, StorageResult<ListInfo<String>>>> {
let prefix = PathBuf::from_iter([self.prefix.as_str(), prefix])
.into_os_string()
.into_string()
.map_err(StorageErrorKind::BadPrefix)?;
let prefix = format!("{}/{}", self.prefix, prefix).replace("//", "/");
let stream = self
.get_client()
.await
Expand Down Expand Up @@ -798,4 +794,39 @@ mod tests {
let deserialized: S3Storage = serde_json::from_str(&serialized).unwrap();
assert_eq!(storage.config, deserialized.config);
}

#[tokio::test]
async fn test_s3_paths() {
let storage = S3Storage::new(
S3Options {
region: Some("us-west-2".to_string()),
endpoint_url: None,
allow_http: true,
anonymous: false,
},
"bucket".to_string(),
Some("prefix".to_string()),
S3Credentials::FromEnv,
)
.unwrap();

let ref_path = storage.ref_key("ref_key").unwrap();
assert_eq!(ref_path, "prefix/refs/ref_key");

let snapshot_id = SnapshotId::random();
let snapshot_path = storage.get_snapshot_path(&snapshot_id).unwrap();
assert_eq!(snapshot_path, format!("prefix/snapshots/{snapshot_id}"));

let manifest_id = ManifestId::random();
let manifest_path = storage.get_manifest_path(&manifest_id).unwrap();
assert_eq!(manifest_path, format!("prefix/manifests/{manifest_id}"));

let chunk_id = ChunkId::random();
let chunk_path = storage.get_chunk_path(&chunk_id).unwrap();
assert_eq!(chunk_path, format!("prefix/chunks/{chunk_id}"));

let transaction_id = SnapshotId::random();
let transaction_path = storage.get_transaction_path(&transaction_id).unwrap();
assert_eq!(transaction_path, format!("prefix/transactions/{transaction_id}"));
}
}

0 comments on commit cc26e3d

Please sign in to comment.