diff --git a/mithril-aggregator/src/artifact_builder/cardano_database.rs b/mithril-aggregator/src/artifact_builder/cardano_database.rs index 47e2f74448..38dc91e94d 100644 --- a/mithril-aggregator/src/artifact_builder/cardano_database.rs +++ b/mithril-aggregator/src/artifact_builder/cardano_database.rs @@ -1,7 +1,4 @@ -use std::{ - path::{Path, PathBuf}, - sync::Arc, -}; +use std::{path::PathBuf, sync::Arc}; use anyhow::{anyhow, Context}; use async_trait::async_trait; @@ -16,7 +13,9 @@ use mithril_common::{ CardanoNetwork, StdResult, }; -use crate::artifact_builder::{AncillaryArtifactBuilder, ArtifactBuilder}; +use crate::artifact_builder::{ + utils::compute_uncompressed_database_size, AncillaryArtifactBuilder, ArtifactBuilder, +}; use super::{DigestArtifactBuilder, ImmutableArtifactBuilder}; @@ -109,41 +108,6 @@ impl ArtifactBuilder for CardanoDataba } } -// TODO Need to test and fix when there is files or directories include in another one (do not count twice) -// TODO should we externalize this tool ? -pub(crate) fn compute_size(paths: Vec) -> StdResult { - let mut total = 0; - for path_to_include in paths { - total += compute_uncompressed_database_size(&path_to_include)?; - } - Ok(total) -} - -fn compute_uncompressed_database_size(path: &Path) -> StdResult { - if path.is_file() { - let metadata = std::fs::metadata(path) - .with_context(|| format!("Failed to read metadata for file: {:?}", path))?; - - return Ok(metadata.len()); - } - - if path.is_dir() { - let entries = std::fs::read_dir(path) - .with_context(|| format!("Failed to read directory: {:?}", path))?; - let mut directory_size = 0; - for entry in entries { - let path = entry - .with_context(|| format!("Failed to read directory entry in {:?}", path))? - .path(); - directory_size += compute_uncompressed_database_size(&path)?; - } - - return Ok(directory_size); - } - - Ok(0) -} - #[cfg(test)] mod tests { use std::{collections::BTreeMap, path::PathBuf}; diff --git a/mithril-aggregator/src/artifact_builder/cardano_database_artifacts/ancillary.rs b/mithril-aggregator/src/artifact_builder/cardano_database_artifacts/ancillary.rs index 4189059f88..2b7b6e3841 100644 --- a/mithril-aggregator/src/artifact_builder/cardano_database_artifacts/ancillary.rs +++ b/mithril-aggregator/src/artifact_builder/cardano_database_artifacts/ancillary.rs @@ -15,7 +15,7 @@ use mithril_common::{ }; use crate::{ - artifact_builder::cardano_database::compute_size, + artifact_builder::utils::compute_size, file_uploaders::{GcpUploader, LocalUploader}, services::{OngoingSnapshot, Snapshotter}, DumbUploader, FileUploader, diff --git a/mithril-aggregator/src/artifact_builder/cardano_database_artifacts/immutable.rs b/mithril-aggregator/src/artifact_builder/cardano_database_artifacts/immutable.rs index 2102babcfa..d48507f2f1 100644 --- a/mithril-aggregator/src/artifact_builder/cardano_database_artifacts/immutable.rs +++ b/mithril-aggregator/src/artifact_builder/cardano_database_artifacts/immutable.rs @@ -17,7 +17,7 @@ use mithril_common::{ }; use crate::{ - artifact_builder::compute_size, + artifact_builder::utils::compute_size, file_uploaders::{GcpUploader, LocalUploader}, services::Snapshotter, DumbUploader, FileUploader, diff --git a/mithril-aggregator/src/artifact_builder/mod.rs b/mithril-aggregator/src/artifact_builder/mod.rs index c3aaffadf8..5f5c6210ab 100644 --- a/mithril-aggregator/src/artifact_builder/mod.rs +++ b/mithril-aggregator/src/artifact_builder/mod.rs @@ -6,6 +6,7 @@ mod cardano_stake_distribution; mod cardano_transactions; mod interface; mod mithril_stake_distribution; +mod utils; pub use cardano_database::*; pub use cardano_database_artifacts::*; diff --git a/mithril-aggregator/src/artifact_builder/utils.rs b/mithril-aggregator/src/artifact_builder/utils.rs new file mode 100644 index 0000000000..86e2dab4f8 --- /dev/null +++ b/mithril-aggregator/src/artifact_builder/utils.rs @@ -0,0 +1,168 @@ +use anyhow::Context; +use std::path::{Path, PathBuf}; + +use mithril_common::StdResult; + +pub(crate) fn compute_size(paths: Vec) -> StdResult { + fn is_in_paths(paths: &Vec, path_to_check: &PathBuf) -> bool { + paths.iter().any(|path| path_to_check.starts_with(path)) + } + + fn remove_duplicated_paths(paths: Vec) -> Vec { + let mut result_paths = vec![]; + for path in paths { + if !is_in_paths(&result_paths, &path) { + result_paths.retain(|p| !p.starts_with(&path)); + result_paths.push(path); + } + } + result_paths + } + + let paths = remove_duplicated_paths(paths); + + let mut total = 0; + for path_to_include in paths { + total += compute_uncompressed_database_size(&path_to_include)?; + } + Ok(total) +} + +pub(crate) fn compute_uncompressed_database_size(path: &Path) -> StdResult { + if path.is_file() { + let metadata = std::fs::metadata(path) + .with_context(|| format!("Failed to read metadata for file: {:?}", path))?; + + return Ok(metadata.len()); + } + + if path.is_dir() { + let entries = std::fs::read_dir(path) + .with_context(|| format!("Failed to read directory: {:?}", path))?; + let mut directory_size = 0; + for entry in entries { + let path = entry + .with_context(|| format!("Failed to read directory entry in {:?}", path))? + .path(); + directory_size += compute_uncompressed_database_size(&path)?; + } + + return Ok(directory_size); + } + + Ok(0) +} + +#[cfg(test)] +mod tests { + use std::fs::File; + use std::io::Write; + + use mithril_common::current_function; + use mithril_common::test_utils::TempDir; + + use super::*; + + /// Create a file with the given name in the given dir, write some text to it, and then + /// return its path. + fn write_dummy_file(optional_size: Option, dir: &Path, filename: &str) -> PathBuf { + let file = dir.join(Path::new(filename)); + let mut source_file = File::create(&file).unwrap(); + + write!(source_file, "This is a test file named '{filename}'").unwrap(); + + if let Some(file_size) = optional_size { + writeln!(source_file).unwrap(); + source_file.set_len(file_size).unwrap(); + } + + file + } + + #[test] + fn test_compute_file_size() { + let test_dir = TempDir::create("utils", current_function!()); + let file_path = write_dummy_file(Some(4), &test_dir, "file"); + + let size = compute_size(vec![file_path]).unwrap(); + assert_eq!(size, 4); + } + + #[test] + fn test_compute_multiple_files_size() { + let test_dir = TempDir::create("utils", current_function!()); + let file_path_1 = write_dummy_file(Some(4), &test_dir, "file_1"); + let file_path_2 = write_dummy_file(Some(7), &test_dir, "file_2"); + + let size = compute_size(vec![file_path_1, file_path_2]).unwrap(); + assert_eq!(size, 11); + } + + #[test] + fn test_compute_folder_size() { + let test_dir = TempDir::create("utils", current_function!()); + write_dummy_file(Some(4), &test_dir, "file_1"); + write_dummy_file(Some(7), &test_dir, "file_2"); + + let size = compute_size(vec![test_dir]).unwrap(); + assert_eq!(size, 11); + } + + #[test] + fn test_compute_multi_folders_size() { + let test_dir = TempDir::create("utils", current_function!()); + + let sub_dir_1 = test_dir.join("sub_dir_1"); + std::fs::create_dir(&sub_dir_1).unwrap(); + write_dummy_file(Some(4), &sub_dir_1, "file_1"); + + let sub_dir_2 = test_dir.join("sub_dir_2"); + std::fs::create_dir(&sub_dir_2).unwrap(); + write_dummy_file(Some(7), &sub_dir_2, "file_2"); + + let sub_dir_3 = test_dir.join("sub_dir_3"); + std::fs::create_dir(&sub_dir_3).unwrap(); + write_dummy_file(Some(3), &sub_dir_3, "file_3"); + + let size = compute_size(vec![sub_dir_1, sub_dir_2]).unwrap(); + assert_eq!(size, 11); + } + + #[test] + fn test_compute_sub_folders_size() { + let test_dir = TempDir::create("utils", current_function!()); + + let sub_dir_1 = test_dir.join("sub_dir_1"); + std::fs::create_dir(&sub_dir_1).unwrap(); + write_dummy_file(Some(4), &sub_dir_1, "file_1"); + + let sub_dir_2 = sub_dir_1.join("sub_dir_2"); + std::fs::create_dir(&sub_dir_2).unwrap(); + write_dummy_file(Some(7), &sub_dir_2, "file_2"); + + let size = compute_size(vec![sub_dir_1]).unwrap(); + assert_eq!(size, 11); + } + + #[test] + fn test_compute_size_count_a_file_only_once() { + let test_dir = TempDir::create("utils", current_function!()); + let file_path_1 = write_dummy_file(Some(4), &test_dir, "file_1"); + + let size = + compute_size(vec![file_path_1.clone(), file_path_1.clone(), file_path_1]).unwrap(); + assert_eq!(size, 4); + } + + #[test] + fn test_compute_size_count_a_file_only_once_when_it_s_part_of_a_computed_folder() { + let test_dir = TempDir::create("utils", current_function!()); + let file_path_1 = write_dummy_file(Some(4), &test_dir, "file_1"); + + let size = compute_size(vec![test_dir.clone(), file_path_1.clone()]).unwrap(); + assert_eq!(size, 4); + + let size = compute_size(vec![file_path_1, test_dir]).unwrap(); + assert_eq!(size, 4); + } +}