Skip to content

Commit

Permalink
WIP: Tools to compute size for a list of paths
Browse files Browse the repository at this point in the history
  • Loading branch information
sfauvel committed Feb 28, 2025
1 parent bb0edf4 commit ec610bf
Show file tree
Hide file tree
Showing 5 changed files with 175 additions and 42 deletions.
44 changes: 4 additions & 40 deletions mithril-aggregator/src/artifact_builder/cardano_database.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
use std::{
path::{Path, PathBuf},
sync::Arc,
};
use std::{path::PathBuf, sync::Arc};

use anyhow::{anyhow, Context};
use async_trait::async_trait;
Expand All @@ -16,7 +13,9 @@ use mithril_common::{
CardanoNetwork, StdResult,
};

use crate::artifact_builder::{AncillaryArtifactBuilder, ArtifactBuilder};
use crate::artifact_builder::{
utils::compute_uncompressed_database_size, AncillaryArtifactBuilder, ArtifactBuilder,
};

use super::{DigestArtifactBuilder, ImmutableArtifactBuilder};

Expand Down Expand Up @@ -109,41 +108,6 @@ impl ArtifactBuilder<CardanoDbBeacon, CardanoDatabaseSnapshot> for CardanoDataba
}
}

// TODO Need to test and fix when there is files or directories include in another one (do not count twice)
// TODO should we externalize this tool ?
pub(crate) fn compute_size(paths: Vec<PathBuf>) -> StdResult<u64> {
let mut total = 0;
for path_to_include in paths {
total += compute_uncompressed_database_size(&path_to_include)?;
}
Ok(total)
}

fn compute_uncompressed_database_size(path: &Path) -> StdResult<u64> {
if path.is_file() {
let metadata = std::fs::metadata(path)
.with_context(|| format!("Failed to read metadata for file: {:?}", path))?;

return Ok(metadata.len());
}

if path.is_dir() {
let entries = std::fs::read_dir(path)
.with_context(|| format!("Failed to read directory: {:?}", path))?;
let mut directory_size = 0;
for entry in entries {
let path = entry
.with_context(|| format!("Failed to read directory entry in {:?}", path))?
.path();
directory_size += compute_uncompressed_database_size(&path)?;
}

return Ok(directory_size);
}

Ok(0)
}

#[cfg(test)]
mod tests {
use std::{collections::BTreeMap, path::PathBuf};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use mithril_common::{
};

use crate::{
artifact_builder::cardano_database::compute_size,
artifact_builder::utils::compute_size,
file_uploaders::{GcpUploader, LocalUploader},
services::{OngoingSnapshot, Snapshotter},
DumbUploader, FileUploader,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use mithril_common::{
};

use crate::{
artifact_builder::compute_size,
artifact_builder::utils::compute_size,
file_uploaders::{GcpUploader, LocalUploader},
services::Snapshotter,
DumbUploader, FileUploader,
Expand Down
1 change: 1 addition & 0 deletions mithril-aggregator/src/artifact_builder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ mod cardano_stake_distribution;
mod cardano_transactions;
mod interface;
mod mithril_stake_distribution;
mod utils;

pub use cardano_database::*;
pub use cardano_database_artifacts::*;
Expand Down
168 changes: 168 additions & 0 deletions mithril-aggregator/src/artifact_builder/utils.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
use anyhow::Context;
use std::path::{Path, PathBuf};

use mithril_common::StdResult;

pub(crate) fn compute_size(paths: Vec<PathBuf>) -> StdResult<u64> {
fn is_in_paths(paths: &Vec<PathBuf>, path_to_check: &PathBuf) -> bool {

Check warning

Code scanning / clippy

writing &Vec instead of &[_] involves a new object where a slice will do Warning

writing &Vec instead of &[\_] involves a new object where a slice will do

Check warning

Code scanning / clippy

writing &Vec instead of &[_] involves a new object where a slice will do Warning

writing &Vec instead of &[\_] involves a new object where a slice will do

Check warning

Code scanning / clippy

writing &PathBuf instead of &Path involves a new object where a slice will do Warning

writing &PathBuf instead of &Path involves a new object where a slice will do

Check warning

Code scanning / clippy

writing &PathBuf instead of &Path involves a new object where a slice will do Warning

writing &PathBuf instead of &Path involves a new object where a slice will do
paths.iter().any(|path| path_to_check.starts_with(path))
}

fn remove_duplicated_paths(paths: Vec<PathBuf>) -> Vec<PathBuf> {
let mut result_paths = vec![];
for path in paths {
if !is_in_paths(&result_paths, &path) {
result_paths.retain(|p| !p.starts_with(&path));
result_paths.push(path);
}
}
result_paths
}

let paths = remove_duplicated_paths(paths);

let mut total = 0;
for path_to_include in paths {
total += compute_uncompressed_database_size(&path_to_include)?;
}
Ok(total)
}

pub(crate) fn compute_uncompressed_database_size(path: &Path) -> StdResult<u64> {
if path.is_file() {
let metadata = std::fs::metadata(path)
.with_context(|| format!("Failed to read metadata for file: {:?}", path))?;

return Ok(metadata.len());
}

if path.is_dir() {
let entries = std::fs::read_dir(path)
.with_context(|| format!("Failed to read directory: {:?}", path))?;
let mut directory_size = 0;
for entry in entries {
let path = entry
.with_context(|| format!("Failed to read directory entry in {:?}", path))?
.path();
directory_size += compute_uncompressed_database_size(&path)?;
}

return Ok(directory_size);
}

Ok(0)
}

#[cfg(test)]
mod tests {
use std::fs::File;
use std::io::Write;

use mithril_common::current_function;
use mithril_common::test_utils::TempDir;

use super::*;

/// Create a file with the given name in the given dir, write some text to it, and then
/// return its path.
fn write_dummy_file(optional_size: Option<u64>, dir: &Path, filename: &str) -> PathBuf {
let file = dir.join(Path::new(filename));
let mut source_file = File::create(&file).unwrap();

write!(source_file, "This is a test file named '{filename}'").unwrap();

if let Some(file_size) = optional_size {
writeln!(source_file).unwrap();
source_file.set_len(file_size).unwrap();
}

file
}

#[test]
fn test_compute_file_size() {
let test_dir = TempDir::create("utils", current_function!());
let file_path = write_dummy_file(Some(4), &test_dir, "file");

let size = compute_size(vec![file_path]).unwrap();
assert_eq!(size, 4);
}

#[test]
fn test_compute_multiple_files_size() {
let test_dir = TempDir::create("utils", current_function!());
let file_path_1 = write_dummy_file(Some(4), &test_dir, "file_1");
let file_path_2 = write_dummy_file(Some(7), &test_dir, "file_2");

let size = compute_size(vec![file_path_1, file_path_2]).unwrap();
assert_eq!(size, 11);
}

#[test]
fn test_compute_folder_size() {
let test_dir = TempDir::create("utils", current_function!());
write_dummy_file(Some(4), &test_dir, "file_1");
write_dummy_file(Some(7), &test_dir, "file_2");

let size = compute_size(vec![test_dir]).unwrap();
assert_eq!(size, 11);
}

#[test]
fn test_compute_multi_folders_size() {
let test_dir = TempDir::create("utils", current_function!());

let sub_dir_1 = test_dir.join("sub_dir_1");
std::fs::create_dir(&sub_dir_1).unwrap();
write_dummy_file(Some(4), &sub_dir_1, "file_1");

let sub_dir_2 = test_dir.join("sub_dir_2");
std::fs::create_dir(&sub_dir_2).unwrap();
write_dummy_file(Some(7), &sub_dir_2, "file_2");

let sub_dir_3 = test_dir.join("sub_dir_3");
std::fs::create_dir(&sub_dir_3).unwrap();
write_dummy_file(Some(3), &sub_dir_3, "file_3");

let size = compute_size(vec![sub_dir_1, sub_dir_2]).unwrap();
assert_eq!(size, 11);
}

#[test]
fn test_compute_sub_folders_size() {
let test_dir = TempDir::create("utils", current_function!());

let sub_dir_1 = test_dir.join("sub_dir_1");
std::fs::create_dir(&sub_dir_1).unwrap();
write_dummy_file(Some(4), &sub_dir_1, "file_1");

let sub_dir_2 = sub_dir_1.join("sub_dir_2");
std::fs::create_dir(&sub_dir_2).unwrap();
write_dummy_file(Some(7), &sub_dir_2, "file_2");

let size = compute_size(vec![sub_dir_1]).unwrap();
assert_eq!(size, 11);
}

#[test]
fn test_compute_size_count_a_file_only_once() {
let test_dir = TempDir::create("utils", current_function!());
let file_path_1 = write_dummy_file(Some(4), &test_dir, "file_1");

let size =
compute_size(vec![file_path_1.clone(), file_path_1.clone(), file_path_1]).unwrap();
assert_eq!(size, 4);
}

#[test]
fn test_compute_size_count_a_file_only_once_when_it_s_part_of_a_computed_folder() {
let test_dir = TempDir::create("utils", current_function!());
let file_path_1 = write_dummy_file(Some(4), &test_dir, "file_1");

let size = compute_size(vec![test_dir.clone(), file_path_1.clone()]).unwrap();
assert_eq!(size, 4);

let size = compute_size(vec![file_path_1, test_dir]).unwrap();
assert_eq!(size, 4);
}
}

0 comments on commit ec610bf

Please sign in to comment.