Skip to content

Commit

Permalink
bug fix to make sure the caches are not in one big flat file
Browse files Browse the repository at this point in the history
  • Loading branch information
mwang87 committed Aug 16, 2023
1 parent 63cbbe7 commit 21353ac
Showing 1 changed file with 7 additions and 1 deletion.
8 changes: 7 additions & 1 deletion massql/msql_fileloading.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ def _determine_cache_filename_prefix(input_filename, cache_dir=None, cache_file=
namespace = uuid.UUID('6ba7b810-9dad-11d1-80b4-00c04fd430c8')
hashed_identifier = str(uuid.uuid3(namespace, "{}:{}".format(input_filename, input_file_size))).replace("-", "")

cache_filename = os.path.join(cache_dir, hashed_identifier)
hash_folder = hashed_identifier[:2]

cache_filename = os.path.join(cache_dir, hash_folder, hashed_identifier)
elif cache_file is not None:
# Here we assume that the cache_file is a full path but without the extensions
cache_filename = cache_file
Expand Down Expand Up @@ -123,6 +125,10 @@ def load_data(input_filename, cache=None, cache_dir=None, cache_file=None):
if cache == "feather":
ms1_filename, ms2_filename = _determine_feather_cache_filename(input_filename, cache_dir=cache_dir, cache_file=cache_file)

# lets make sure the folder exists for the filenames
if not os.path.exists(os.path.dirname(ms1_filename)):
os.makedirs(os.path.dirname(ms1_filename))

if not (os.path.exists(ms1_filename) or os.path.exists(ms2_filename)):
try:
ms1_df.to_feather(ms1_filename)
Expand Down

0 comments on commit 21353ac

Please sign in to comment.