Skip to content

Commit

Permalink
export_as_keyed_shard now works as streaming function (#104)
Browse files Browse the repository at this point in the history
With this PR, export_as_keyed_shard_streaming now allows generation of a protected shard using only a reader and writer, without the footer needing to be loaded.
  • Loading branch information
hoytak authored Dec 5, 2024
1 parent 129c634 commit fcdb017
Show file tree
Hide file tree
Showing 3 changed files with 218 additions and 60 deletions.
36 changes: 32 additions & 4 deletions mdb_shard/src/shard_file_handle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::cas_structs::CASChunkSequenceHeader;
use crate::error::{MDBShardError, Result};
use crate::file_structs::{FileDataSequenceEntry, MDBFileInfo};
use crate::shard_format::MDBShardInfo;
use crate::utils::{parse_shard_filename, shard_file_name, temp_shard_file_name};
use crate::utils::{parse_shard_filename, shard_file_name, temp_shard_file_name, truncate_hash};

/// When a specific implementation of the
#[derive(Debug, Clone, Default)]
Expand Down Expand Up @@ -50,16 +50,20 @@ impl MDBShardFile {
hashed_write = HashedWrite::new(out_file);

std::io::copy(reader, &mut hashed_write)?;
hashed_write.flush()?;
}

// Get the hash
hashed_write.flush()?;
let shard_hash = hashed_write.hash();

let full_file_name = target_directory.join(shard_file_name(&shard_hash));

std::fs::rename(&temp_file_name, &full_file_name)?;

let si = MDBShardInfo::load_from_file(reader)?;

debug_assert_eq!(MDBShardInfo::load_from_file(&mut Cursor::new(&mut std::fs::read(&full_file_name)?))?, si);

Self::new(shard_hash, full_file_name, MDBShardInfo::load_from_file(reader)?)
}

Expand Down Expand Up @@ -141,7 +145,10 @@ impl MDBShardFile {
include_chunk_lookup_table,
)?;

Self::write_out_from_reader(target_directory, &mut Cursor::new(output_bytes))
let written_out = Self::write_out_from_reader(target_directory, &mut Cursor::new(output_bytes))?;
written_out.verify_shard_integrity_debug_only();

Ok(written_out)
}

#[inline]
Expand Down Expand Up @@ -255,6 +262,27 @@ impl MDBShardFile {
}
debug!("Integrity test passed for shard {:?}", &self.path);

// TODO: More parts; but this will at least succeed on the server end.
// Verify that the shard chunk lookup tables are correct.

// Read from the lookup table section.
let mut read_truncated_hashes = self.read_all_truncated_hashes().unwrap();

let mut truncated_hashes = Vec::new();

let cas_blocks = self.shard.read_all_cas_blocks_full(&mut self.get_reader().unwrap()).unwrap();

// Read from the cas blocks
let mut cas_index = 0;
for ci in cas_blocks {
for (i, chunk) in ci.chunks.iter().enumerate() {
truncated_hashes.push((truncate_hash(&chunk.chunk_hash), (cas_index as u32, i as u32)));
}
cas_index += 1 + ci.chunks.len();
}

read_truncated_hashes.sort_by_key(|s| s.0);
truncated_hashes.sort_by_key(|s| s.0);

assert_eq!(read_truncated_hashes, truncated_hashes);
}
}
17 changes: 14 additions & 3 deletions mdb_shard/src/shard_file_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -973,8 +973,15 @@ mod tests {
continue;
}

// Do some repeat keys to make sure that path is tested as well.
let key: HMACKey = rng_hash((i % 6) as u64);
let key: HMACKey = {
if i == 1 {
// This tests that the default route with no hmac translation is solid too
HMACKey::default()
} else {
// Do some repeat keys to make sure that path is tested as well.
rng_hash((i % 6) as u64)
}
};

let shard = MDBShardFile::load_from_file(p)?;

Expand All @@ -989,7 +996,11 @@ mod tests {
include_info,
)
.unwrap();
assert_eq!(out.chunk_hmac_key(), Some(key));
if key != HMACKey::default() {
assert_eq!(out.chunk_hmac_key(), Some(key));
} else {
assert_eq!(out.chunk_hmac_key(), None);
}
}

// Now, verify that everything still works great.
Expand Down
Loading

0 comments on commit fcdb017

Please sign in to comment.