Skip to content

Commit

Permalink
fix: delete temporary folders locally
Browse files Browse the repository at this point in the history
  • Loading branch information
phueb committed Mar 16, 2021
1 parent 518d8a3 commit f719d06
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 30 deletions.
9 changes: 5 additions & 4 deletions ludwig/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,10 +260,11 @@ def submit():
if namespace.local or namespace.isolated:
series_list = user_job.main(job.param2val)
save_job_files(job.param2val, series_list, runs_path)
# remove old parents of save_paths - these should always be empty
for p in cwd.glob('param*'):
print(f'Removing {p}')
shutil.rmtree(p)

# temporary runs folder auto-created with name = {project_name}_runs must be removed
path_tmp = cwd / f'{src_path.name}_runs'
shutil.rmtree(path_tmp)
print(f'Removed temporary directory {path_tmp}')

# if running on Ludwig, save worker instructions to shared drive
else:
Expand Down
17 changes: 10 additions & 7 deletions ludwig/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,34 +19,37 @@ def save_job_files(param2val: Dict[str, Any],
if not series_list:
print('WARNING: Job did not return any results')

# save series_list
# job_path refers to local directory if --isolated but not --local
job_path = runs_path / param2val['param_name'] / param2val['job_name']
if not job_path.exists():
job_path.mkdir(parents=True)

# save series_list
for series in series_list:
if not isinstance(series, pd.Series):
print('WARNING: Object returned by job must be a pandas.Series.')
continue
raise TypeError('Object returned by job must be a pandas.Series.')
if series.name is None:
raise AttributeError('Each pandas.Series returned by job must have attribute name refer to unique string.')
with (job_path / '{}.csv'.format(series.name)).open('w') as f:
series.to_csv(f, index=True, header=[series.name]) # cannot name the index with "header" arg
print(f'Saved results to {job_path}')

# save param2val
param2val_path = runs_path / param2val['param_name'] / 'param2val.yaml'
print(f'Saving param2val to {param2val_path}')
if not param2val_path.exists():
param2val_path.parent.mkdir(exist_ok=True)
param2val['job_name'] = None
with param2val_path.open('w', encoding='utf8') as f:
yaml.dump(param2val, f, default_flow_style=False, allow_unicode=True)
print(f'Saved param2val to {param2val_path}')

# move contents of save_path to shared drive
# move contents of save_path to job_path (can be local or remote)
save_path = Path(param2val['save_path'])
src = str(save_path)
dst = str(job_path)
if save_path.exists(): # user may not create a directory at save path
print(f'Moving {src} to shared drive')
shutil.move(src, dst) # src is no longer available afterwards
print('Done moving')
print(f'Moved contents of save_path to {job_path}')


def run_job_on_ludwig_worker(param2val):
Expand Down
18 changes: 0 additions & 18 deletions ludwig/uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
"""
from pathlib import Path
import pysftp
import platform
import psutil
import pickle
from typing import Union, Optional

Expand Down Expand Up @@ -37,18 +35,6 @@ def __init__(self,
print('WARNING: Skipping hostkey checking.')
self.cnopts.hostkeys = None

def check_disk_space(self, verbose=False):
if platform.system() in {'Linux'}:
p = self.project_path.parent
usage_stats = psutil.disk_usage(str(p))
percent_used = usage_stats[3]
if verbose:
print_ludwig('Percent Disk Space used at {}: {}'.format(p, percent_used))
if percent_used > configs.Remote.disk_max_percent:
raise RuntimeError('Disk space usage > {}.'.format(configs.Remote.disk_max_percent))
else:
pass

def to_disk(self,
job: Job,
worker: Optional[str] = None,
Expand Down Expand Up @@ -92,8 +78,6 @@ def start_jobs(self,
assert self.project_name.lower() == self.src_name # TODO what about when src name must be different?
# this must be true because in run.py project_name is converted to src_name

self.check_disk_space()

# -------------------------------------- prepare paths

if not self.project_path.exists():
Expand Down Expand Up @@ -143,8 +127,6 @@ def kill_jobs(self,
assert self.project_name.lower() == self.src_name # TODO what about when src name must be different?
# this must be true because in run.py project_name is converted to src_name

self.check_disk_space()

# -------------------------------------- prepare paths

if not self.project_path.exists():
Expand Down
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
'console_scripts': [
'ludwig=ludwig.__main__:submit',
'ludwig-status=ludwig.__main__:status',
'ludwig-add-ssh-config=ludwig.__main__:add_ssh_config'
]
}
)

0 comments on commit f719d06

Please sign in to comment.