diff --git a/CHANGELOG.md b/CHANGELOG.md index 42f89a8d..6806b89a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,8 @@ Bob versions changelog - Make iouring optional (#567) #### Fixed +- Do not propagate an initialization error when it is determined that the disk is not available (#807) +- Fix crash in hardware metrics when disk is not available (#812) - Fix memory leak due to prometheus lib (#788) #### Updated diff --git a/bob-backend/src/pearl/disk_controller.rs b/bob-backend/src/pearl/disk_controller.rs index 7646baba..47105c3b 100644 --- a/bob-backend/src/pearl/disk_controller.rs +++ b/bob-backend/src/pearl/disk_controller.rs @@ -305,7 +305,7 @@ impl DiskController { .cloned() } - async fn get_or_create_pearl(&self, operation: &Operation) -> BackendResult<Group> { + async fn get_or_create_alien_pearl(&self, operation: &Operation) -> BackendResult<Group> { trace!("try get alien pearl, operation {:?}", operation); // block for read lock: it should be dropped before write lock is acquired (otherwise - deadlock) { @@ -361,7 +361,7 @@ impl DiskController { } async fn groups_run_initialized(&self, pp: impl Hooks) -> AnyResult<()> { - let res = { + let mut res = { let _permit = self.run_sem.acquire().await.expect("Semaphore is closed"); Self::run_groups(self.groups.clone(), pp).await }; @@ -369,6 +369,7 @@ impl DiskController { error!("Can't run groups on disk {:?} (reason: {})", self.disk, e); if !Self::is_work_dir_available(self.disk.path()) { self.change_state(GroupsState::NotReady).await; + res = Ok(()); } } else { self.change_state(GroupsState::Ready).await; @@ -384,7 +385,7 @@ impl DiskController { data: &BobData, ) -> Result<(), Error> { if *self.state.read().await == GroupsState::Ready { - let vdisk_group = self.get_or_create_pearl(&op).await; + let vdisk_group = self.get_or_create_alien_pearl(&op).await; match vdisk_group { Ok(group) => match group.put(key, data, StartTimestampConfig::new(false)).await { Err(e) => Err(self.process_error(e).await), @@ -569,7 +570,7 @@ impl DiskController { } } else { // we should create group only when force_delete == true - match self.get_or_create_pearl(&op).await { + match self.get_or_create_alien_pearl(&op).await { Ok(group) => group, Err(err) => { error!( diff --git a/bob-backend/src/pearl/group.rs b/bob-backend/src/pearl/group.rs index b092883d..59e0d946 100644 --- a/bob-backend/src/pearl/group.rs +++ b/bob-backend/src/pearl/group.rs @@ -71,7 +71,7 @@ impl Group { let config = self.settings.config(); let new_holders = config .try_multiple_times_async( - || self.read_vdisk_directory(), + || self.init_and_read_vdisk_directory(), "can't create pearl holders", self.settings.config().fail_retry_timeout(), ) @@ -567,7 +567,7 @@ impl Group { self.create_pearl_holder(start_timestamp, &hash) } - pub async fn read_vdisk_directory(&self) -> BackendResult<Vec<Holder>> { + pub async fn init_and_read_vdisk_directory(&self) -> BackendResult<Vec<Holder>> { Utils::check_or_create_directory(&self.directory_path).await?; let mut holders = vec![]; diff --git a/bob-backend/src/pearl/holder.rs b/bob-backend/src/pearl/holder.rs index 5361c4ac..799abab5 100644 --- a/bob-backend/src/pearl/holder.rs +++ b/bob-backend/src/pearl/holder.rs @@ -430,14 +430,11 @@ impl Holder { } async fn init_holder(&self) -> AnyResult<Storage<Key>> { - let f = || Utils::check_or_create_directory(&self.inner.disk_path); - self.inner.config - .try_multiple_times_async( - f, - &format!("cannot check path: {:?}", self.inner.disk_path), - self.inner.config.fail_retry_timeout(), - ) - .await?; + Utils::check_or_create_directory(&self.inner.disk_path).await + .map_err(|err| { + error!("cannot create holder dir: {:?}. Error: {:?}", self.inner.disk_path, err); + err + } )?; self.inner.config .try_multiple_times_async( diff --git a/bob-backend/src/pearl/settings.rs b/bob-backend/src/pearl/settings.rs index 8af4b8a5..62e8d7da 100644 --- a/bob-backend/src/pearl/settings.rs +++ b/bob-backend/src/pearl/settings.rs @@ -158,7 +158,7 @@ impl Settings { ) -> BackendResult<Group> { let path = self.alien_path(vdisk_id, &remote_node_name); - Utils::check_or_create_directory(&path).await?; + Utils::check_or_create_all_directories(&path, Some(&self.alien_folder)).await?; let disk_name = self .config @@ -178,8 +178,6 @@ impl Settings { } pub async fn get_all_subdirectories(path: &Path) -> BackendResult<Vec<DirEntry>> { - Utils::check_or_create_directory(path).await?; - let mut dir = read_dir(path).await.map_err(|e| { let msg = format!("couldn't process path: {:?}, error: {:?} ", path, e); error!("{}", msg); diff --git a/bob-backend/src/pearl/tests.rs b/bob-backend/src/pearl/tests.rs index c6a33e23..75c2898e 100755 --- a/bob-backend/src/pearl/tests.rs +++ b/bob-backend/src/pearl/tests.rs @@ -20,6 +20,8 @@ async fn create_backend(node_config: &str, cluster_config: &str) -> BackendResul let node = NodeConfig::get_from_string(node_config, &cluster).unwrap(); debug!("node: {:?}", node); + let disk = node.disks().lock().expect("lock error")[0].path().to_owned(); + std::fs::create_dir_all(&disk).expect("dir creation error"); let mapper = Arc::new(Virtual::new(&node, &cluster)); debug!("mapper: {:?}", mapper); PearlBackend::new(mapper, &node).await diff --git a/bob-backend/src/pearl/utils.rs b/bob-backend/src/pearl/utils.rs index 72b1569c..a476b712 100755 --- a/bob-backend/src/pearl/utils.rs +++ b/bob-backend/src/pearl/utils.rs @@ -21,26 +21,75 @@ impl StartTimestampConfig { } impl Utils { + /// Creates directory. Parent directory should exist pub async fn check_or_create_directory(path: &Path) -> BackendResult<()> { + if path.exists() { + trace!("directory: {} exists", path.display()); + return Ok(()); + } + + let dir = path + .to_str() + .ok_or_else(|| Error::storage("invalid some path, check vdisk or disk names"))?; + + match tokio::fs::create_dir(&path).await { + Ok(_) => { + info!("dir created: {}", path.display()); + Ok(()) + }, + Err(err) => { + match err.kind() { + IOErrorKind::AlreadyExists => { + trace!("directory: {} exists", path.display()); + Ok(()) + }, + IOErrorKind::NotFound => { + Err(Error::storage(format!("cannot create directory, because part of the path is not exist: '{}', error: {}", dir, err))) + }, + IOErrorKind::PermissionDenied | IOErrorKind::Other => { + Err(Error::possible_disk_disconnection()) + }, + _ => { + Err(Error::storage(format!("cannot create directory: '{}', error: {}", dir, err))) + } + } + } + } + } + + /// Creates directory and all its parents. If `base_path` is specified, then it should exist before creation started + pub async fn check_or_create_all_directories(path: &Path, base_path: Option<&Path>) -> BackendResult<()> { if path.exists() { trace!("directory: {:?} exists", path); - } else { - let dir = path - .to_str() - .ok_or_else(|| Error::storage("invalid some path, check vdisk or disk names"))?; - - create_dir_all(&path) - .await - .map(|_| info!("create directory: {}", dir)) - .map_err(|e| match e.kind() { + return Ok(()); + } + + if let Some(base_path) = base_path { + if !base_path.exists() { + return Err(Error::storage(format!("cannot create directory, because base path is not exist. Dir: '{}', Base path: '{}'", path.display(), base_path.display()))); + } + } + + let dir = path + .to_str() + .ok_or_else(|| Error::storage("invalid some path, check vdisk or disk names"))?; + + match tokio::fs::create_dir_all(&path).await { + Ok(_) => { + info!("dir created: {}", path.display()); + Ok(()) + }, + Err(err) => { + match err.kind() { IOErrorKind::PermissionDenied | IOErrorKind::Other => { - Error::possible_disk_disconnection() + Err(Error::possible_disk_disconnection()) + }, + _ => { + Err(Error::storage(format!("cannot create directory: {}, error: {}", dir, err))) } - _ => Error::storage(format!("cannot create directory: {}, error: {}", dir, e)), - })?; - info!("dir created: {}", path.display()); + } + } } - Ok(()) } pub async fn drop_pearl_lock_file(path: &Path) -> BackendResult<()> { diff --git a/bob/src/hw_metrics_collector.rs b/bob/src/hw_metrics_collector.rs index 8e4c11cb..5be82a02 100644 --- a/bob/src/hw_metrics_collector.rs +++ b/bob/src/hw_metrics_collector.rs @@ -37,19 +37,30 @@ impl HWMetricsCollector { } fn collect_used_disks(disks: &[DiskPath]) -> HashMap<PathBuf, DiskName> { - System::new_all() + System::new_with_specifics(RefreshKind::new().with_disks_list().with_disks()) .disks() .iter() .filter_map(|d| { let path = d.mount_point(); + let path_metadata = match path.metadata() { + Ok(meta) => meta, + Err(err) => { + warn!("Can't get metadata from OS for disk '{}' returned by 'sysinfo': {:?}", path.display(), err); + return None; + } + }; disks .iter() .find(move |dp| { - let dp_md = Path::new(dp.path()) - .metadata() - .expect("Can't get metadata from OS"); - let p_md = path.metadata().expect("Can't get metadata from OS"); - p_md.dev() == dp_md.dev() + match Path::new(dp.path()).metadata() { + Ok(dp_metadata) => { + path_metadata.dev() == dp_metadata.dev() + }, + Err(err) => { + warn!("Can't get metadata from OS for disk '{}' specified in cluster config: {:?}", dp.path(), err); + false + } + } }) .map(|config_disk| { let diskpath = path.to_str().expect("Not UTF-8").to_owned();