Skip to content

Commit

Permalink
Fix for empty file shard poisoning.
Browse files Browse the repository at this point in the history
  • Loading branch information
hoytak committed Oct 10, 2024
1 parent 3e40d1a commit 85c0279
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 9 deletions.
12 changes: 12 additions & 0 deletions mdb_shard/src/cas_structs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,18 @@ impl CASChunkSequenceHeader {
}
}

pub fn bookend() -> Self {
Self {
// Use all 1s to denote a bookend hash.
cas_hash: [!0u64; 4].into(),
..Default::default()
}
}

pub fn is_bookend(&self) -> bool {
self.cas_hash == [!0u64; 4].into()
}

pub fn new_with_compression<I1: TryInto<u32>, I2: TryInto<u32> + Copy>(
cas_hash: MerkleHash,
num_entries: I1,
Expand Down
12 changes: 12 additions & 0 deletions mdb_shard/src/file_structs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,18 @@ impl FileDataSequenceHeader {
}
}

pub fn bookend() -> Self {
Self {
// The bookend file hash is all 1s
file_hash: [!0u64; 4].into(),
..Default::default()
}
}

pub fn is_bookend(&self) -> bool {
self.file_hash == [!0u64; 4].into()
}

pub fn serialize<W: Write>(&self, writer: &mut W) -> Result<usize, std::io::Error> {
let mut buf = [0u8; size_of::<Self>()];
{
Expand Down
8 changes: 4 additions & 4 deletions mdb_shard/src/set_operations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ fn set_operation<R: Read + Seek, W: Write>(

let load_next = |_r: &mut R, _s: &MDBShardInfo| -> Result<_> {
let fdsh = FileDataSequenceHeader::deserialize(_r)?;
if fdsh.file_hash == MerkleHash::default() {
if fdsh.is_bookend() {
Ok(None)
} else {
Ok(Some(fdsh))
Expand Down Expand Up @@ -142,7 +142,7 @@ fn set_operation<R: Read + Seek, W: Write>(
};
}
}
out_offset += FileDataSequenceHeader::default().serialize(out)? as u64;
out_offset += FileDataSequenceHeader::bookend().serialize(out)? as u64;

footer.file_lookup_offset = out_offset;
footer.file_lookup_num_entry = file_lookup_data.len() as u64;
Expand Down Expand Up @@ -172,7 +172,7 @@ fn set_operation<R: Read + Seek, W: Write>(

let load_next = |_r: &mut R, _s: &MDBShardInfo| -> Result<_> {
let ccsh = CASChunkSequenceHeader::deserialize(_r)?;
if ccsh.cas_hash == MerkleHash::default() {
if ccsh.is_bookend() {
Ok(None)
} else {
Ok(Some(ccsh))
Expand Down Expand Up @@ -220,7 +220,7 @@ fn set_operation<R: Read + Seek, W: Write>(
}
}

out_offset += CASChunkSequenceHeader::default().serialize(out)? as u64;
out_offset += CASChunkSequenceHeader::bookend().serialize(out)? as u64;

// Write out the cas and chunk lookup sections.
footer.cas_lookup_offset = out_offset;
Expand Down
10 changes: 5 additions & 5 deletions mdb_shard/src/shard_format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ impl MDBShardInfo {
}

// Serialize a single block of 00 bytes as a guard for sequential reading.
bytes_written += FileDataSequenceHeader::default().serialize(writer)?;
bytes_written += FileDataSequenceHeader::bookend().serialize(writer)?;

// No need to sort because BTreeMap is ordered and we truncate by the first 8 bytes.
Ok(((file_lookup_keys, file_lookup_vals), bytes_written))
Expand Down Expand Up @@ -428,8 +428,8 @@ impl MDBShardInfo {
index += 1 + content.chunks.len() as u32;
}

// Serialize a single block of 00 bytes as a guard for sequential reading.
bytes_written += CASChunkSequenceHeader::default().serialize(writer)?;
// Serialize a single bookend entry as a guard for sequential reading.
bytes_written += CASChunkSequenceHeader::bookend().serialize(writer)?;

// No need to sort cas_lookup_ because BTreeMap is ordered and we truncate by the first 8 bytes.

Expand Down Expand Up @@ -901,7 +901,7 @@ impl MDBShardInfo {
loop {
let header = FileDataSequenceHeader::deserialize(reader)?;

if header.file_hash == MerkleHash::default() {
if header.is_bookend() {
break;
}

Expand Down Expand Up @@ -1000,7 +1000,7 @@ impl MDBShardInfo {
out_footer.file_info_offset = self.metadata.file_info_offset;

// Serialize a single block of 00 bytes as a guard for sequential reading.
byte_pos += FileDataSequenceHeader::default().serialize(writer)?;
byte_pos += FileDataSequenceHeader::bookend().serialize(writer)?;

out_footer.file_lookup_offset = byte_pos as u64;
out_footer.file_lookup_num_entry = 0;
Expand Down

0 comments on commit 85c0279

Please sign in to comment.