Skip to content

Commit

Permalink
Add reflink_block function (#85)
Browse files Browse the repository at this point in the history
  • Loading branch information
Vaiz authored Jan 11, 2025
1 parent c9cb2ab commit 6c937d6
Show file tree
Hide file tree
Showing 7 changed files with 524 additions and 42 deletions.
10 changes: 7 additions & 3 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ jobs:
if: ${{ matrix.os == 'windows-latest' }}
uses: samypr100/setup-dev-drive@v3
with:
drive-size: 1GB
drive-size: 16GB
drive-format: ReFS
drive-type: Dynamic
drive-path: "${{ runner.temp }}/dev-drives/refs2.vhdx"
Expand All @@ -94,11 +94,15 @@ jobs:

- name: Test
if: "! matrix.use-cross"
run: cargo test --target ${{ matrix.target }} -- --ignored
run: cargo test --target ${{ matrix.target }} -- --include-ignored --show-output
env:
RUST_BACKTRACE: 1

- name: Test using cross
if: "matrix.use-cross"
run: cross test --target ${{ matrix.target }} -- --ignored
run: cross test --target ${{ matrix.target }} -- --include-ignored --show-output
env:
RUST_BACKTRACE: 1

cross-check:
strategy:
Expand Down
35 changes: 35 additions & 0 deletions examples/reflink_block.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
use std::fs::File;
use std::num::NonZeroU64;

// cargo run --example reflink_block V:/file.bin V:/file_cow.bin 4096

fn main() -> std::io::Result<()> {
let args: Vec<_> = std::env::args().collect();

let [_, src_file, tgt_file, cluster_size] = &args[..] else {
eprintln!(
"Usage: {} <source_file> <target_file> <cluster_size>",
args[0]
);
return Ok(());
};
let cluster_size: NonZeroU64 = cluster_size.parse().expect("cannot parse cluster size");

let from_file = File::open(src_file)?;
let len = from_file.metadata()?.len();
let to_file = File::create(tgt_file)?;
to_file.set_len(len)?;

let mut offset = 0u64;
while offset < len as u64 {
println!("reflink {offset}, {cluster_size}");
reflink_copy::ReflinkBlockBuilder::new(&from_file, &to_file, cluster_size)
.from_offset(offset)
.to_offset(offset)
.cluster_size(cluster_size)
.reflink_block()?;

offset += cluster_size.get();
}
Ok(())
}
3 changes: 3 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
//!
//! As soon as other OSes support the functionality, support will be added.
mod reflink_block;
mod sys;

use std::fs;
Expand Down Expand Up @@ -187,3 +188,5 @@ pub enum ReflinkSupport {
/// Reflink support is unconfirmed.
Unknown,
}

pub use reflink_block::ReflinkBlockBuilder;
134 changes: 134 additions & 0 deletions src/reflink_block.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
use crate::sys;
use std::fs::File;
use std::io;
use std::num::NonZeroU64;

/// Creates a reflink of a specified block from one file to another.
///
/// This functionality is designed to be highly performant and does not perform any extra API calls.
/// It is expected that the user takes care of necessary preliminary checks and preparations.
///
/// If you need to clone an entire file, consider using the [`reflink`] or [`reflink_or_copy`]
/// functions instead.
///
/// > Note: Currently the function works only for windows. It returns `Err` for any other platform.
///
/// # General restrictions
/// - The source and destination regions must begin and end at a cluster boundary.
/// - If the source and destination regions are in the same file, they must not overlap. (The
/// application may able to proceed by splitting up the block clone operation into multiple block
/// clones that no longer overlap.)
///
/// # Windows specific restrictions and remarks
/// - The destination region must not extend past the end of file. If the application wishes to
/// extend the destination with cloned data, it must first call
/// [`File::set_len`](fn@std::fs::File::set_len).
/// - The source and destination files must be on the same ReFS volume.
/// - The source and destination files must have the same Integrity Streams setting (that is,
/// Integrity Streams must be enabled in both files, or disabled in both files).
/// - If the source file is sparse, the destination file must also be sparse.
/// - The block clone operation will break Shared Opportunistic Locks (also known as Level 2
/// Opportunistic Locks).
/// - The ReFS volume must have been formatted with Windows Server 2016, and if Windows Failover
/// Clustering is in use, the Clustering Functional Level must have been Windows Server 2016 or
/// later at format time.
///
/// > Note: In order to handle blocks larger than 4GB,
/// [`ReflinkBlockBuilder::reflink_block`] splits these big blocks into smaller ones.
/// Each smaller block is 4GB minus the cluster size. This means there might be more than one API
/// call needed for the larger blocks.
///
/// More information about block cloning on Windows can be found by the
/// [link](https://learn.microsoft.com/en-us/windows/win32/fileio/block-cloning).
///
/// # Examples
///
/// The example below demonstrates how to create a new file reusing blocks from another file.
/// ```no_run
/// use std::fs::File;
/// use std::num::NonZeroU64;
///
/// fn shuffle() -> std::io::Result<()> {
/// let from_file = File::open("source.bin")?;
/// let to_file = File::create("destination.bin")?;
/// let cluster_size = NonZeroU64::new(4096).unwrap();
/// let len = cluster_size.get() * 2;
///
/// to_file.set_len(len)?;
///
/// reflink_copy::ReflinkBlockBuilder::new(&from_file, &to_file, cluster_size)
/// .from_offset(0)
/// .to_offset(cluster_size.get())
/// .reflink_block()?;
///
/// reflink_copy::ReflinkBlockBuilder::new(&from_file, &to_file, cluster_size)
/// .from_offset(cluster_size.get())
/// .to_offset(0)
/// .reflink_block()?;
///
/// Ok(())
/// }
/// ```
/// [`reflink`]: crate::reflink
/// [`reflink_or_copy`]: crate::reflink_or_copy
#[derive(Debug)]
pub struct ReflinkBlockBuilder<'from, 'to> {
from: &'from File,
from_offset: u64,
to: &'to File,
to_offset: u64,
src_length: u64,
cluster_size: Option<NonZeroU64>,
}

impl<'from, 'to> ReflinkBlockBuilder<'from, 'to> {
/// Creates a new instance of [`ReflinkBlockBuilder`].
pub fn new(from: &'from File, to: &'to File, src_length: NonZeroU64) -> Self {
Self {
from,
from_offset: 0,
to,
to_offset: 0,
src_length: src_length.get(),
cluster_size: None,
}
}

/// Sets the offset within the source file.
#[must_use]
pub fn from_offset(mut self, from_offset: u64) -> Self {
self.from_offset = from_offset;
self
}

/// Sets the offset within the destination file.
#[must_use]
pub fn to_offset(mut self, to_offset: u64) -> Self {
self.to_offset = to_offset;
self
}

/// Sets the cluster size. It is used to calculate the max block size of a single reflink call
/// on Windows.
#[must_use]
pub fn cluster_size(mut self, cluster_size: NonZeroU64) -> Self {
self.cluster_size = Some(cluster_size);
self
}

/// Performs reflink operation for the specified block of data.
#[cfg_attr(not(windows), allow(unused_variables))]
pub fn reflink_block(self) -> io::Result<()> {
#[cfg(windows)]
return sys::reflink_block(
self.from,
self.from_offset,
self.to,
self.to_offset,
self.src_length,
self.cluster_size,
);
#[cfg(not(windows))]
Err(io::Error::other("Not implemented"))
}
}
1 change: 1 addition & 0 deletions src/sys/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ cfg_if! {
mod windows_impl;
pub use self::windows_impl::reflink;
pub use self::windows_impl::check_reflink_support;
pub(crate) use self::windows_impl::reflink_block;
} else {
pub use self::reflink_not_supported as reflink;
}
Expand Down
141 changes: 105 additions & 36 deletions src/sys/windows_impl.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use super::utility::AutoRemovedFile;
use crate::ReflinkSupport;
use std::num::NonZeroU64;

use std::{
convert::TryInto,
Expand Down Expand Up @@ -90,44 +91,19 @@ pub fn reflink(from: &Path, to: &Path) -> io::Result<()> {
}
};

let mut bytes_copied = 0;
// Must be smaller than 4GB; This is always a multiple of ClusterSize
let max_copy_len: i64 = if cluster_size == 0 {
total_copy_len
let cluster_size = if cluster_size != 0 {
Some(NonZeroU64::new(cluster_size as u64).unwrap())
} else {
(4 * 1024 * 1024 * 1024) - cluster_size
None
};
while bytes_copied < total_copy_len {
let bytes_to_copy = total_copy_len.min(max_copy_len);
if cluster_size != 0 {
debug_assert_eq!(bytes_to_copy % cluster_size, 0);
debug_assert_eq!(bytes_copied % cluster_size, 0);
}

let mut dup_extent = DUPLICATE_EXTENTS_DATA {
FileHandle: src.as_handle(),

SourceFileOffset: bytes_copied,
TargetFileOffset: bytes_copied,
ByteCount: bytes_to_copy,
};

let mut bytes_returned = 0u32;
unsafe {
DeviceIoControl(
dest.as_handle(),
FSCTL_DUPLICATE_EXTENTS_TO_FILE,
Some(&mut dup_extent as *mut _ as *mut c_void),
mem::size_of::<DUPLICATE_EXTENTS_DATA>().try_into().unwrap(),
None,
0,
Some(&mut bytes_returned as *mut _),
None,
)
}?;
bytes_copied += bytes_to_copy;
}

reflink_block(
&src,
0,
dest.as_inner_file(),
0,
total_copy_len as u64,
cluster_size,
)?;
if !src_is_sparse {
dest.unset_sparse()?;
}
Expand Down Expand Up @@ -378,10 +354,103 @@ fn get_volume_flags(volume_path_w: &[u16]) -> io::Result<u32> {
Ok(file_system_flags)
}

pub(crate) fn reflink_block(
from: &File,
from_offset: u64,
to: &File,
to_offset: u64,
src_length: u64,
cluster_size: Option<NonZeroU64>,
) -> io::Result<()> {
const GB: u64 = 1024u64 * 1024 * 1024;
const MAX_REFS_CLUSTER_SIZE: u64 = 64 * 1024;

// Must be smaller than 4GB; This is always a multiple of ClusterSize
let max_io_size = 4u64 * GB
- cluster_size
.map(NonZeroU64::get)
.unwrap_or(MAX_REFS_CLUSTER_SIZE);

let mut bytes_copied = 0;
while bytes_copied < src_length {
let bytes_to_copy = max_io_size.min(src_length - bytes_copied);
if let Some(cluster_size) = cluster_size {
debug_assert_eq!(bytes_to_copy % cluster_size, 0);
debug_assert_eq!(bytes_copied % cluster_size, 0);
}

duplicate_extent_to_file(
from,
from_offset + bytes_copied,
to,
to_offset + bytes_copied,
bytes_to_copy,
)?;

bytes_copied += bytes_to_copy;
}

Ok(())
}

fn duplicate_extent_to_file(
from: &File,
from_offset: u64,
to: &File,
to_offset: u64,
src_length: u64,
) -> io::Result<()> {
let mut dup_extent = DUPLICATE_EXTENTS_DATA {
FileHandle: from.as_handle(),
SourceFileOffset: from_offset as i64,
TargetFileOffset: to_offset as i64,
ByteCount: src_length as i64,
};

let mut bytes_returned = 0u32;
unsafe {
DeviceIoControl(
to.as_handle(),
FSCTL_DUPLICATE_EXTENTS_TO_FILE,
Some(&mut dup_extent as *mut _ as *mut c_void),
size_of::<DUPLICATE_EXTENTS_DATA>().try_into().unwrap(),
None,
0,
Some(&mut bytes_returned as *mut _),
None,
)
}?;
Ok(())
}

#[cfg(test)]
mod test {
use super::*;

#[test]
fn test_round_up() {
assert_eq!(round_up(0, 2), 0);
assert_eq!(round_up(1, 2), 2);
assert_eq!(round_up(2, 2), 2);

assert_eq!(round_up(15, 8), 16);
assert_eq!(round_up(17, 8), 24);

assert_eq!(round_up(100000, 4096), 102400);
assert_eq!(round_up(100000, 65536), 131072);
}

#[test]
#[should_panic]
fn test_invalid_multiple_zero() {
round_up(10, 0);
}
#[test]
#[should_panic]
fn test_invalid_multiple_non_power_of_two() {
round_up(10, 3);
}

#[test]
fn test_get_volume_path_is_same() -> io::Result<()> {
let src_volume_path = get_volume_path("./src")?;
Expand Down
Loading

0 comments on commit 6c937d6

Please sign in to comment.