diff --git a/avalanche/evaluation/metrics/disk_usage.py b/avalanche/evaluation/metrics/disk_usage.py index a2a92149e..c0cbd5046 100644 --- a/avalanche/evaluation/metrics/disk_usage.py +++ b/avalanche/evaluation/metrics/disk_usage.py @@ -10,6 +10,9 @@ ################################################################################ import os +import time +from sys import platform +import subprocess from pathlib import Path from typing import Union, Sequence, List, Optional @@ -45,6 +48,8 @@ def __init__( paths_to_monitor = [paths_to_monitor] self._paths_to_monitor: List[str] = [str(p) for p in paths_to_monitor] + # this is used to avoid sending multiple warnings + self._warning_sent = False self.total_usage: float = 0.0 @@ -57,7 +62,7 @@ def update(self): dirs_size = 0.0 for directory in self._paths_to_monitor: - dirs_size += DiskUsage.get_dir_size(directory) + dirs_size += self.get_dir_size(directory) self.total_usage = dirs_size @@ -81,8 +86,7 @@ def reset(self) -> None: """ self.total_usage = 0 - @staticmethod - def get_dir_size(path: str) -> float: + def get_dir_size(self, path) -> float: """ Obtains the size of the given directory, in KiB. @@ -90,15 +94,36 @@ def get_dir_size(path: str) -> float: :return: A float value describing the size (in KiB) of the directory as the sum of all its elements. """ + + start = time.time() total_size = 0.0 - for dirpath, dirnames, filenames in os.walk(path): - for f in filenames: - fp = os.path.join(dirpath, f) - # skip if it is symbolic link - if not os.path.islink(fp): - # in KB - s = os.path.getsize(fp) / 1024 - total_size += s + + if platform == "linux" or platform == "linux2": + total_size = ( + float( + subprocess.check_output(["du", "-sb", path]) + .split()[0] + .decode("utf-8") + ) + / 1024 + ) + else: + for dirpath, dirnames, filenames in os.walk(path): + for f in filenames: + fp = os.path.join(dirpath, f) + # skip if it is symbolic link + if not os.path.islink(fp): + # in KB + s = os.path.getsize(fp) / 1024 + total_size += s + + end = time.time() + elapsed_t = end - start + # if we wait for more than 1 sec. + if elapsed_t > 0.5 and self._warning_sent is False: + print(f"\n\nWARNING: Time to get size of {path}: {elapsed_t}") + print("Are you sure you want to monitor this directory?\n") + self._warning_sent = True return total_size @@ -132,10 +157,7 @@ def __init__(self, paths_to_monitor): Creates an instance of the minibatch Disk usage metric. """ super(MinibatchDiskUsage, self).__init__( - paths_to_monitor, - reset_at="iteration", - emit_at="iteration", - mode="train", + paths_to_monitor, reset_at="iteration", emit_at="iteration", mode="train" ) def __str__(self): @@ -177,10 +199,7 @@ def __init__(self, paths_to_monitor): Creates an instance of the experience Disk usage metric. """ super(ExperienceDiskUsage, self).__init__( - paths_to_monitor, - reset_at="experience", - emit_at="experience", - mode="eval", + paths_to_monitor, reset_at="experience", emit_at="experience", mode="eval" ) def __str__(self): @@ -214,7 +233,7 @@ def disk_usage_metrics( minibatch=False, epoch=False, experience=False, - stream=False + stream=False, ) -> List[DiskPluginMetric]: """ Helper method that can be used to obtain the desired set of diff --git a/tests/evaluation/test_disk_usage.py b/tests/evaluation/test_disk_usage.py new file mode 100644 index 000000000..dbd6dfeff --- /dev/null +++ b/tests/evaluation/test_disk_usage.py @@ -0,0 +1,17 @@ +""" Disk Usage Metric Test""" + +import unittest + +from avalanche.evaluation.metrics import DiskUsage + + +class DiskUsageTests(unittest.TestCase): + def test_basic(self): + """just checking that directory size is computed without errors.""" + + disk = DiskUsage() + disk.get_dir_size(".") + + +if __name__ == "__main__": + unittest.main()