From b0864c6ff6f2afc626dd1d69ddea5eca008d4e17 Mon Sep 17 00:00:00 2001 From: Hunter McGushion Date: Wed, 16 Oct 2019 16:18:22 -0700 Subject: [PATCH 1/8] Remove outdated `ContinueRemap` and `visit_feature_engineer` utilities --- hyperparameter_hunter/i_o/exceptions.py | 5 -- .../utils/optimization_utils.py | 54 ------------------- 2 files changed, 59 deletions(-) diff --git a/hyperparameter_hunter/i_o/exceptions.py b/hyperparameter_hunter/i_o/exceptions.py index 964adb62..2100a438 100644 --- a/hyperparameter_hunter/i_o/exceptions.py +++ b/hyperparameter_hunter/i_o/exceptions.py @@ -130,11 +130,6 @@ def __init__(self, candidate, template): super(IncompatibleCandidateError, self).__init__(message) -class ContinueRemap(Exception): - def __str__(self): - return "Just keep doing what you were doing" - - ################################################## # Deprecation Warnings ################################################## diff --git a/hyperparameter_hunter/utils/optimization_utils.py b/hyperparameter_hunter/utils/optimization_utils.py index deb9e2ba..5f444c45 100644 --- a/hyperparameter_hunter/utils/optimization_utils.py +++ b/hyperparameter_hunter/utils/optimization_utils.py @@ -10,7 +10,6 @@ ################################################## # Import Own Assets ################################################## -from hyperparameter_hunter.i_o.exceptions import ContinueRemap from hyperparameter_hunter.keys.hashing import make_hash_sha256 from hyperparameter_hunter.space.dimensions import Real, Integer, Categorical, RejectedOptional from hyperparameter_hunter.utils.boltons_utils import get_path, remap @@ -165,59 +164,6 @@ def does_fit_in_space(root, space): return dimension_subset(root, space.names()) in space -def visit_feature_engineer(path, key, value): - """Helper to be used within a `visit` function intended for a `remap`-like function - - Parameters - ---------- - path: Tuple - The path of keys that leads to `key` - key: String - The parameter name - value: Object - The value of the parameter `key` - - Returns - ------- - False if the value represents a dataset, or tuple of (`key`, ). If neither of - these are returned, a `ContinueRemap` exception is raised - - Raises - ------ - ContinueRemap - If a value is not returned by `visit_function_engineer`. For proper functioning, this raised - `ContinueRemap` is assumed to be handled by the calling `visit` function. Usually, the - `except` block for `ContinueRemap` will simply continue execution of `visit` - - Examples - -------- - >>> visit_feature_engineer(("feature_engineer",), "datasets", dict()) - False - >>> visit_feature_engineer(("feature_engineer", "steps"), "f", lambda _: _) # pytest: +ELLIPSIS - ('f', '...') - >>> visit_feature_engineer(("feature_engineer", "steps"), "foo", lambda _: _) - Traceback (most recent call last): - File "optimization_utils.py", line ?, in visit_feature_engineer - hyperparameter_hunter.i_o.exceptions.ContinueRemap: Just keep doing what you were doing - >>> visit_feature_engineer(("feature_engineer",), "foo", dict()) - Traceback (most recent call last): - File "optimization_utils.py", line ?, in visit_feature_engineer - hyperparameter_hunter.i_o.exceptions.ContinueRemap: Just keep doing what you were doing - >>> visit_feature_engineer(("foo",), "bar", dict()) - Traceback (most recent call last): - File "optimization_utils.py", line ?, in visit_feature_engineer - hyperparameter_hunter.i_o.exceptions.ContinueRemap: Just keep doing what you were doing""" - if path and path[0] == "feature_engineer": - # Drop dataset hashes - if key in ("datasets", "original_hashes", "updated_hashes") and isinstance(value, dict): - return False - # Ensure `EngineerStep.f` is hashed - with suppress(IndexError): - if path[1] == "steps" and key == "f" and callable(value): - return key, make_hash_sha256(value) - raise ContinueRemap - - def get_choice_dimensions(params, iter_attrs=None): """List all elements in the nested structure `params` that are hyperparameter space choices From 2200e06634b6beb9ebae288735ad0919ff56c931 Mon Sep 17 00:00:00 2001 From: Hunter McGushion Date: Fri, 25 Oct 2019 18:22:10 -0700 Subject: [PATCH 2/8] Handle .yaml and .json Descriptions in `has_experiment_result_file` - Originally, only .json files were considered - Now, three extensions are checked: ".yaml", ".yml", and ".json" (in that order) --- hyperparameter_hunter/i_o/result_reader.py | 32 +++++++++++----------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/hyperparameter_hunter/i_o/result_reader.py b/hyperparameter_hunter/i_o/result_reader.py index d9e42fad..4f5f5c28 100644 --- a/hyperparameter_hunter/i_o/result_reader.py +++ b/hyperparameter_hunter/i_o/result_reader.py @@ -929,14 +929,14 @@ def _visit(path, key, value): ################################################## # Utilities ################################################## -def has_experiment_result_file(results_dir, experiment_id, result_type=None): - """Check if the specified result files exist in `results_dir` for Experiment `experiment_id` +def has_experiment_result_file(results_dir, exp_id, result_type=None): + """Check if the specified result files exist in `results_dir` for Experiment `exp_id` Parameters ---------- results_dir: String HyperparameterHunterAssets directory in which to search for Experiment result files - experiment_id: String, or BaseExperiment + exp_id: String, or BaseExperiment ID of the Experiment whose result files should be searched for in `results_dir`. If not string, should be an instance of a descendant of :class:`~hyperparameter_hunter.experiments.BaseExperiment` with an "experiment_id" attribute @@ -953,8 +953,13 @@ def has_experiment_result_file(results_dir, experiment_id, result_type=None): ------- Boolean True if all result files specified by `result_type` exist in `results_dir` for the - Experiment specified by `experiment_id`. Else, False""" - experiment_id = experiment_id if isinstance(experiment_id, str) else experiment_id.experiment_id + Experiment specified by `exp_id`. Else, False""" + exp_id = exp_id if isinstance(exp_id, str) else exp_id.experiment_id + + if results_dir.endswith("HyperparameterHunterAssets"): + exp_dir = Path(results_dir) / "Experiments" + else: + exp_dir = Path(results_dir) / "HyperparameterHunterAssets" / "Experiments" #################### Format `result_type` #################### if not result_type: @@ -972,25 +977,20 @@ def has_experiment_result_file(results_dir, experiment_id, result_type=None): result_type = [result_type] for subdir in result_type: - #################### Select Result File Suffix #################### + #################### Select Result File Suffixes #################### if subdir == "Descriptions": - suffix = ".json" + suffixes = (".yaml", ".yml", ".json") elif subdir == "Heartbeats": - suffix = ".log" + suffixes = (".log",) elif subdir == "ScriptBackups": - suffix = ".py" + suffixes = (".py",) elif subdir.startswith("Predictions"): - suffix = ".csv" + suffixes = (".csv",) else: raise ValueError(f"Cannot resolve suffix for subdir `result_type`: {subdir}") #################### Check "Experiments" Directory #################### - if results_dir.endswith("HyperparameterHunterAssets"): - experiments_dir = Path(results_dir) / "Experiments" - else: - experiments_dir = Path(results_dir) / "HyperparameterHunterAssets" / "Experiments" - - if not (experiments_dir / subdir / f"{experiment_id}{suffix}").exists(): + if not any((exp_dir / subdir / f"{exp_id}{suffix}").exists() for suffix in suffixes): return False return True From 243656dee4a7b86ca6547f2860c020786897f559 Mon Sep 17 00:00:00 2001 From: Hunter McGushion Date: Sat, 26 Oct 2019 15:45:03 -0700 Subject: [PATCH 3/8] Add YAML read/write utilities - Add `ruamel.yaml` to dependencies --- hyperparameter_hunter/utils/file_utils.py | 89 ++++++++++++++++++++++- setup.py | 1 + 2 files changed, 89 insertions(+), 1 deletion(-) diff --git a/hyperparameter_hunter/utils/file_utils.py b/hyperparameter_hunter/utils/file_utils.py index c407499c..06fd7d51 100644 --- a/hyperparameter_hunter/utils/file_utils.py +++ b/hyperparameter_hunter/utils/file_utils.py @@ -13,8 +13,10 @@ import os import os.path import pandas as pd +from pathlib import Path +from ruamel.yaml import YAML import simplejson as json -from typing import Union +from typing import Any, List, Tuple, Union import wrapt @@ -181,6 +183,91 @@ def add_to_json(file_path, data_to_add, key=None, condition=None, default=None, write_json(file_path, original_data) +################################################## +# YAML File Functions +################################################## +# Extra Representers used in the default HH Ruamel YAML instance +_RUAMEL_REPRESENTERS: List[Tuple[type, callable]] = [ + (np.ndarray, lambda dumper, data: dumper.represent_list(data.tolist())), + (np.float64, lambda dumper, data: dumper.represent_float(float(data))), + (np.int64, lambda dumper, data: dumper.represent_int(int(data))), + (tuple, lambda dumper, data: dumper.represent_sequence("tag:yaml.org,2002:python/tuple", data)), + (str, lambda dumper, data: dumper.represent_scalar("tag:yaml.org,2002:str", data, style='"')), +] +# Extra Constructors used in the default HH Ruamel YAML instance +_RUAMEL_CONSTRUCTORS: List[Tuple[str, callable]] = [ + ("tag:yaml.org,2002:python/tuple", lambda loader, node: tuple(loader.construct_sequence(node))) +] + + +def get_ruamel_instance() -> YAML: + """Get the default :class:`ruamel.yaml.YAML` instance used for dumping/loading YAML files + + Returns + ------- + yml: YAML + :class:`ruamel.yaml.YAML` instance configured for HyperparameterHunter, outfitted with + additional Ruamel Representers to properly format non-standard data types""" + #################### Prepare Ruamel YAML Instance #################### + yml = YAML(typ="safe") + yml.default_flow_style = None + yml.sort_base_mapping_type_on_output = False # False retains original mapping order + yml.top_level_colon_align = True # Make it easier to see top-level elements + yml.width = 100 + + #################### Add Auxiliary Ruamel Representers/Constructors #################### + for (data_type, representer) in _RUAMEL_REPRESENTERS: + yml.representer.add_representer(data_type, representer) + + for (tag, constructor) in _RUAMEL_CONSTRUCTORS: + yml.constructor.add_constructor(tag, constructor) + + return yml + + +def read_yaml(file_path: str, yml: YAML = None) -> object: + """Get the contents of the .yaml file located at `file_path` + + Parameters + ---------- + file_path: String + Path to the .yaml file to be read + yml: YAML (optional) + :class:`ruamel.yaml.YAML` instance used to load data from `file_path`. If not given, the + result of :func:`get_ruamel_instance` is used + + Returns + ------- + Object + Contents of the .yaml file located at `file_path`""" + yml = get_ruamel_instance() if yml is None else yml + return yml.load(file_path) + + +def write_yaml(file_path: Union[str, Path], data: Any, yml: YAML = None, do_make_dirs: bool = True): + """Write `data` to the YAML file specified by `file_path` + + Parameters + ---------- + file_path: String, or Path + Target .yaml file path to which `data` will be written + data: Object + Content to save at the .yaml file given by `file_path` + yml: YAML (optional) + :class:`ruamel.yaml.YAML` instance used to dump `data` to `file_path`. If not given, the + result of :func:`get_ruamel_instance` is used + do_make_dirs: Boolean, default=True + If True, create any parent directories in `file_path` that don't already exist""" + file_path = Path(file_path) + yml = get_ruamel_instance() if yml is None else yml + + if do_make_dirs: + make_dirs(file_path.parent, exist_ok=True) + + with open(file_path, "w+") as f: + yml.dump(data, f) + + ################################################## # General File Functions ################################################## diff --git a/setup.py b/setup.py index 7b2a26ad..fc7f6e1d 100644 --- a/setup.py +++ b/setup.py @@ -47,6 +47,7 @@ def readme(): "nbformat", "numpy", "pandas", + "ruamel.yaml", "scikit-learn", "scikit-optimize", "scipy", From e2ac0703a3d131ea4135157887798aff79086b78 Mon Sep 17 00:00:00 2001 From: Hunter McGushion Date: Sat, 26 Oct 2019 20:23:34 -0700 Subject: [PATCH 4/8] Refactor file writing utils to use `RetryMakeDirs` - Fix bug in `read_yaml` because `yml.load` expected a `pathlib.Path` instance, rather than a string - Minor refactoring, and moved utility definitions to put general utilities at top (most importantly, `RetryMakeDirs`) --- hyperparameter_hunter/utils/file_utils.py | 373 +++++++++++----------- 1 file changed, 181 insertions(+), 192 deletions(-) diff --git a/hyperparameter_hunter/utils/file_utils.py b/hyperparameter_hunter/utils/file_utils.py index 06fd7d51..3f881084 100644 --- a/hyperparameter_hunter/utils/file_utils.py +++ b/hyperparameter_hunter/utils/file_utils.py @@ -21,7 +21,162 @@ ################################################## -# JSON File Functions +# General File Utilities/Decorators +################################################## +def make_dirs(name, mode=0o0777, exist_ok=False): + """Permissive version of `os.makedirs` that gives full permissions by default + + Parameters + ---------- + name: Str + Path/name of directory to create. Will make intermediate-level directories needed to contain + the leaf directory + mode: Number, default=0o0777 + File permission bits for creating the leaf directory + exist_ok: Boolean, default=False + If False, an `OSError` is raised if the directory targeted by `name` already exists""" + old_mask = os.umask(000) + os.makedirs(name, mode=mode, exist_ok=exist_ok) + os.umask(old_mask) + + +def clear_file(file_path): + """Erase the contents of the file located at `file_path` + + Parameters + ---------- + file_path: String + The path of the file whose contents should be cleared out""" + clear_target = open(file_path, "w") + clear_target.truncate() + clear_target.close() + + +class RetryMakeDirs(object): + def __init__(self): + """Execute decorated callable, but if `OSError` is raised, call :func:`make_dirs` on the + directory specified by the exception, then recall the decorated callable again. This also + works with operations on files, in which case the file's parent directories are created + + Examples + -------- + >>> from tempfile import TemporaryDirectory + >>> with TemporaryDirectory(dir="") as d: # doctest: +ELLIPSIS + ... def f_0(): + ... os.mkdir(f"{d}/nonexistent_dir/subdir") + ... f_0() + Traceback (most recent call last): + File "file_utils.py", line ?, in f_0 + FileNotFoundError: [Errno 2] No such file or directory... + >>> with TemporaryDirectory(dir="") as d: + ... @RetryMakeDirs() + ... def f_1(): + ... os.mkdir(f"{d}/nonexistent_dir/subdir") + ... f_1() + """ + + @wrapt.decorator + def __call__(self, wrapped, instance, args, kwargs): + try: + return wrapped(*args, **kwargs) + except OSError as _ex: + # TODO: Add ability to check `kwargs` for value dictating whether to call `make_dirs` + # - Provide name or index (if arg) of value to check in `RetryMakeDirs.__init__` + if _ex.filename: + make_dirs(os.path.split(_ex.filename)[0], exist_ok=True) + return wrapped(*args, **kwargs) + + +class ParametersFromFile(object): + def __init__(self, key: Union[str, int] = None, file: str = None, verbose: bool = False): + """Decorator to specify a .json file that defines default values for the decorated callable. + The location of the file can either be specified explicitly with `file`, or it can be + retrieved when the decorated callable is called through an argument key/index given by `key` + + Parameters + ---------- + key: String, or integer, default=None + Used only if `file` is not also given. Determines a value for `file` based on the + parameters passed to the decorated callable. If string, represents a key in `kwargs` + passed to :meth:`ParametersFromFile.__call__`. In other words, this names a keyword + argument passed to the decorated callable. If `key` is integer, it represents an index + in `args` passed to :meth:`ParametersFromFile.__call__`, the value at which specifies a + filepath containing the default parameters dict to use + file: String, default=None + If not None, `key` will be ignored, and `file` will be used as the filepath from which + to read the dict of default parameters for the decorated callable + verbose: Boolean, default=False + If True, will log messages when invalid keys are found in the parameters file, and when + keys are set to the default values in the parameters file. Else, logging is silenced + + Notes + ----- + The order of precedence for determining the value of each parameter is as follows, with + items at the top having the highest priority, and deferring only to the items below if + their own value is not given: + + * 1)parameters explicitly passed to the callable decorated by `ParametersFromFile`, + * 2)parameters in the .json file denoted by `key` or `file`, + * 3)parameter defaults defined in the signature of the decorated callable + + Examples + -------- + >>> from tempfile import TemporaryDirectory + >>> with TemporaryDirectory(dir="") as d: + ... write_json(f"{d}/config.json", dict(b="I came from config.json", c="Me too!")) + ... @ParametersFromFile(file=f"{d}/config.json") + ... def f_0(a="first_a", b="first_b", c="first_c"): + ... print(f"{a} ... {b} ... {c}") + ... @ParametersFromFile(key="config_file") + ... def f_1(a="second_a", b="second_b", c="second_c", config_file=None): + ... print(f"{a} ... {b} ... {c}") + ... f_0(c="Hello, there") + ... f_0(b="General Kenobi") + ... f_1() + ... f_1(a="Generic prequel meme", config_file=f"{d}/config.json") + ... f_1(c="This is where the fun begins", config_file=None) + first_a ... I came from config.json ... Hello, there + first_a ... General Kenobi ... Me too! + second_a ... second_b ... second_c + Generic prequel meme ... I came from config.json ... Me too! + second_a ... second_b ... This is where the fun begins""" + self.key = key + self.file = file + self.verbose = verbose + + @wrapt.decorator + def __call__(self, wrapped, instance, args, kwargs): + file = self.file + file_params = {} + + #################### Locate Parameters File #################### + if not file and self.key is not None: + with suppress(TypeError): + file = kwargs.get(self.key, None) or args[self.key] + + if file: # If `file=None`, continue with empty dict of `file_params` + file_params = read_json(file) + + if not isinstance(file_params, dict): + raise TypeError("{} must have dict, not {}".format(file, file_params)) + + #################### Check Valid Parameters for `wrapped` #################### + ok_keys = [k for k, v in signature(wrapped).parameters.items() if v.kind == v.KEYWORD_ONLY] + + for k, v in file_params.items(): + if k not in ok_keys: + if self.verbose: + G.warn(f"Invalid key ({k}) in user parameters file: {file}") + if k not in kwargs: + kwargs[k] = v + if self.verbose: + G.debug(f"Parameter `{k}` set to user default in parameters file: '{file}'") + + return wrapped(*args, **kwargs) + + +################################################## +# JSON File Utilities ################################################## def default_json_write(obj): """Convert values that are not JSON-friendly to a more acceptable type @@ -29,12 +184,12 @@ def default_json_write(obj): Parameters ---------- obj: Object - The object that is expected to be of a type that is incompatible with JSON files + Object that is expected to be of a type that is incompatible with JSON files Returns ------- Object - The value of `obj` after being cast to a type accepted by JSON + Value of `obj` after being cast to a type accepted by JSON Raises ------ @@ -94,45 +249,34 @@ def hook_json_read(obj): return obj -def write_json(file_path, data, do_clear=False): - """Write `data` to the JSON file specified by `file_path`, optionally clearing the file before - adding `data` - - Parameters - ---------- - file_path: String - The target .json file path to which `data` will be written - data: Object - The content to save at the .json file given by `file_path` - do_clear: Boolean, default=False - If True, the contents of the file at `file_path` will be cleared before saving `data`""" - if do_clear is True: - clear_file(file_path) - - with open(file_path, "w") as f: - json.dump(data, f, default=default_json_write, tuple_as_array=False) - - -def read_json(file_path, np_arr=False): +def read_json(file_path: str) -> object: """Get the contents of the .json file located at `file_path` Parameters ---------- file_path: String - The path of the .json file to be read - np_arr: Boolean, default=False - If True, the contents read from `file_path` will be cast to a numpy array before returning + Path to the .json file to be read Returns ------- content: Object The contents of the .json file located at `file_path`""" content = json.loads(open(file_path).read(), object_hook=hook_json_read) + return content - if np_arr is True: - return np.array(content) - return content +@RetryMakeDirs() +def write_json(file_path: str, data: Any): + """Write `data` to the JSON file specified by `file_path` + + Parameters + ---------- + file_path: String + Target .json file path to which `data` will be written + data: Object + Content to save in the .json file given by `file_path`""" + with open(file_path, "w") as f: + json.dump(data, f, default=default_json_write, tuple_as_array=False) def add_to_json(file_path, data_to_add, key=None, condition=None, default=None, append_value=False): @@ -184,7 +328,7 @@ def add_to_json(file_path, data_to_add, key=None, condition=None, default=None, ################################################## -# YAML File Functions +# YAML File Utilities ################################################## # Extra Representers used in the default HH Ruamel YAML instance _RUAMEL_REPRESENTERS: List[Tuple[type, callable]] = [ @@ -225,12 +369,12 @@ def get_ruamel_instance() -> YAML: return yml -def read_yaml(file_path: str, yml: YAML = None) -> object: +def read_yaml(file_path: Union[str, Path], yml: YAML = None) -> object: """Get the contents of the .yaml file located at `file_path` Parameters ---------- - file_path: String + file_path: String, or Path Path to the .yaml file to be read yml: YAML (optional) :class:`ruamel.yaml.YAML` instance used to load data from `file_path`. If not given, the @@ -240,11 +384,13 @@ def read_yaml(file_path: str, yml: YAML = None) -> object: ------- Object Contents of the .yaml file located at `file_path`""" + file_path = Path(file_path) yml = get_ruamel_instance() if yml is None else yml return yml.load(file_path) -def write_yaml(file_path: Union[str, Path], data: Any, yml: YAML = None, do_make_dirs: bool = True): +@RetryMakeDirs() +def write_yaml(file_path: Union[str, Path], data: Any, yml: YAML = None): """Write `data` to the YAML file specified by `file_path` Parameters @@ -252,174 +398,17 @@ def write_yaml(file_path: Union[str, Path], data: Any, yml: YAML = None, do_make file_path: String, or Path Target .yaml file path to which `data` will be written data: Object - Content to save at the .yaml file given by `file_path` + Content to save in the .yaml file given by `file_path` yml: YAML (optional) :class:`ruamel.yaml.YAML` instance used to dump `data` to `file_path`. If not given, the - result of :func:`get_ruamel_instance` is used - do_make_dirs: Boolean, default=True - If True, create any parent directories in `file_path` that don't already exist""" + result of :func:`get_ruamel_instance` is used""" file_path = Path(file_path) yml = get_ruamel_instance() if yml is None else yml - if do_make_dirs: - make_dirs(file_path.parent, exist_ok=True) - with open(file_path, "w+") as f: yml.dump(data, f) -################################################## -# General File Functions -################################################## -def make_dirs(name, mode=0o0777, exist_ok=False): - """Permissive version of `os.makedirs` that gives full permissions by default - - Parameters - ---------- - name: Str - Path/name of directory to create. Will make intermediate-level directories needed to contain - the leaf directory - mode: Number, default=0o0777 - File permission bits for creating the leaf directory - exist_ok: Boolean, default=False - If False, an `OSError` is raised if the directory targeted by `name` already exists""" - old_mask = os.umask(000) - os.makedirs(name, mode=mode, exist_ok=exist_ok) - os.umask(old_mask) - - -def clear_file(file_path): - """Erase the contents of the file located at `file_path` - - Parameters - ---------- - file_path: String - The path of the file whose contents should be cleared out""" - clear_target = open(file_path, "w") - clear_target.truncate() - clear_target.close() - - -class RetryMakeDirs(object): - def __init__(self): - """Execute decorated callable, but if `OSError` is raised, call :func:`make_dirs` on the - directory specified by the exception, then recall the decorated callable again - - Examples - -------- - >>> from tempfile import TemporaryDirectory - >>> with TemporaryDirectory(dir="") as d: # doctest: +ELLIPSIS - ... def f_0(): - ... os.mkdir(f"{d}/nonexistent_dir/subdir") - ... f_0() - Traceback (most recent call last): - File "file_utils.py", line ?, in f_0 - FileNotFoundError: [Errno 2] No such file or directory... - >>> with TemporaryDirectory(dir="") as d: - ... @RetryMakeDirs() - ... def f_1(): - ... os.mkdir(f"{d}/nonexistent_dir/subdir") - ... f_1() - """ - - @wrapt.decorator - def __call__(self, wrapped, instance, args, kwargs): - try: - return wrapped(*args, **kwargs) - except OSError as _ex: - if _ex.filename: - make_dirs(os.path.split(_ex.filename)[0], exist_ok=True) - return wrapped(*args, **kwargs) - - -class ParametersFromFile(object): - def __init__(self, key: Union[str, int] = None, file: str = None, verbose: bool = False): - """Decorator to specify a .json file that defines default values for the decorated callable. - The location of the file can either be specified explicitly with `file`, or it can be - retrieved when the decorated callable is called through an argument key/index given by `key` - - Parameters - ---------- - key: String, or integer, default=None - Used only if `file` is not also given. Determines a value for `file` based on the - parameters passed to the decorated callable. If string, represents a key in `kwargs` - passed to :meth:`ParametersFromFile.__call__`. In other words, this names a keyword - argument passed to the decorated callable. If `key` is integer, it represents an index - in `args` passed to :meth:`ParametersFromFile.__call__`, the value at which specifies a - filepath containing the default parameters dict to use - file: String, default=None - If not None, `key` will be ignored, and `file` will be used as the filepath from which - to read the dict of default parameters for the decorated callable - verbose: Boolean, default=False - If True, will log messages when invalid keys are found in the parameters file, and when - keys are set to the default values in the parameters file. Else, logging is silenced - - Notes - ----- - The order of precedence for determining the value of each parameter is as follows, with - items at the top having the highest priority, and deferring only to the items below if - their own value is not given: - - * 1)parameters explicitly passed to the callable decorated by `ParametersFromFile`, - * 2)parameters in the .json file denoted by `key` or `file`, - * 3)parameter defaults defined in the signature of the decorated callable - - Examples - -------- - >>> from tempfile import TemporaryDirectory - >>> with TemporaryDirectory(dir="") as d: - ... write_json(f"{d}/config.json", dict(b="I came from config.json", c="Me too!")) - ... @ParametersFromFile(file=f"{d}/config.json") - ... def f_0(a="first_a", b="first_b", c="first_c"): - ... print(f"{a} ... {b} ... {c}") - ... @ParametersFromFile(key="config_file") - ... def f_1(a="second_a", b="second_b", c="second_c", config_file=None): - ... print(f"{a} ... {b} ... {c}") - ... f_0(c="Hello, there") - ... f_0(b="General Kenobi") - ... f_1() - ... f_1(a="Generic prequel meme", config_file=f"{d}/config.json") - ... f_1(c="This is where the fun begins", config_file=None) - first_a ... I came from config.json ... Hello, there - first_a ... General Kenobi ... Me too! - second_a ... second_b ... second_c - Generic prequel meme ... I came from config.json ... Me too! - second_a ... second_b ... This is where the fun begins""" - self.key = key - self.file = file - self.verbose = verbose - - @wrapt.decorator - def __call__(self, wrapped, instance, args, kwargs): - file = self.file - file_params = {} - - #################### Locate Parameters File #################### - if not file and self.key is not None: - with suppress(TypeError): - file = kwargs.get(self.key, None) or args[self.key] - - if file: # If `file=None`, continue with empty dict of `file_params` - file_params = read_json(file) - - if not isinstance(file_params, dict): - raise TypeError("{} must have dict, not {}".format(file, file_params)) - - #################### Check Valid Parameters for `wrapped` #################### - ok_keys = [k for k, v in signature(wrapped).parameters.items() if v.kind == v.KEYWORD_ONLY] - - for k, v in file_params.items(): - if k not in ok_keys: - if self.verbose: - G.warn(f"Invalid key ({k}) in user parameters file: {file}") - if k not in kwargs: - kwargs[k] = v - if self.verbose: - G.debug(f"Parameter `{k}` set to user default in parameters file: '{file}'") - - return wrapped(*args, **kwargs) - - ################################################## # Display Utilities ################################################## From 920974775fd95573b54ad8db9bab0211f01a6309 Mon Sep 17 00:00:00 2001 From: Hunter McGushion Date: Sun, 27 Oct 2019 01:46:56 -0700 Subject: [PATCH 5/8] Default to YAML Experiment Description --- CHANGELOG.md | 9 ++ hyperparameter_hunter/i_o/recorders.py | 73 +++++++------- hyperparameter_hunter/i_o/result_reader.py | 11 ++- .../optimization/protocol_core.py | 2 +- hyperparameter_hunter/settings.py | 10 +- .../utils/optimization_utils.py | 40 +++++++- tests/test_space/test_space.py | 94 ++++++++++++++++++- 7 files changed, 194 insertions(+), 45 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c749d08..48f7f833 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,15 @@ ## [Unreleased] ### Features +* Save Experiment Description files in YAML (from JSON) + * Makes Description files easier for humans to read + * Fixes issues with certain hyperparameter types ignored by JSON, such as dicts with non-string + keys, and tuples + * If you hate the new YAML Descriptions and want to go back to JSON, you can set + `settings.G.description_format` to "json" + * Consider opening [an issue](https://github.com/HunterMcGushion/hyperparameter_hunter/issues), + telling me this was a dumb decision. Feedback is very much appreciated (honestly) + * Be warned that reverting to "json" means the above-noted issues could occur * Enabled optimization of tuple values via [`Categorical`](https://hyperparameter-hunter.readthedocs.io/en/stable/source/hyperparameter_hunter.space.html#hyperparameter_hunter.space.dimensions.Categorical) * This can be used with Keras to search over different `kernel_size` values for `Conv2D` or `pool_size` values for `MaxPooling2D`, for example: diff --git a/hyperparameter_hunter/i_o/recorders.py b/hyperparameter_hunter/i_o/recorders.py index cd982f53..85ae7d33 100644 --- a/hyperparameter_hunter/i_o/recorders.py +++ b/hyperparameter_hunter/i_o/recorders.py @@ -14,7 +14,7 @@ from hyperparameter_hunter.i_o.leaderboards import GlobalLeaderboard from hyperparameter_hunter.settings import G from hyperparameter_hunter.utils.file_utils import write_json, add_to_json, make_dirs, read_json -from hyperparameter_hunter.utils.file_utils import RetryMakeDirs +from hyperparameter_hunter.utils.file_utils import RetryMakeDirs, write_yaml from hyperparameter_hunter.utils.general_utils import subdict ################################################## @@ -124,7 +124,8 @@ def __init__(self, file_blacklist=None, extra_recorders=None): an Experiment. The contents of `extra_recorders` are blacklisted in the same way as normal `recorders`. That is, if `file_blacklist` contains the `result_path_key` of a recorder in `extra_recorders`, that recorder is blacklisted""" - # WARNING: Take care if modifying the order/contents of :attr:`recorders`. See :meth:`save_result` documentation for info + # WARNING: Take care if modifying the order/contents of :attr:`recorders` + # See :meth:`save_result` documentation for info self.recorders = [ TestedKeyRecorder, LeaderboardEntryRecorder, @@ -193,7 +194,7 @@ class DescriptionRecorder(BaseRecorder): "cross_experiment_key", "last_evaluation_results", "stat_aggregates", - # 'train_features', + # "train_features", "source_script", "notes", "model_initializer", @@ -204,25 +205,22 @@ class DescriptionRecorder(BaseRecorder): ] def format_result(self): - """Format an OrderedDict containing the Experiment's identifying attributes, results, + """Format a dict containing the Experiment's identifying attributes, results, hyperparameters used, and other stats or information that may be useful""" - self.result = OrderedDict( - [ - ("experiment_id", self.experiment_id), - ("algorithm_name", self.algorithm_name), - ("module_name", self.module_name), - ("hyperparameter_key", self.hyperparameter_key.key), - ("cross_experiment_key", self.cross_experiment_key.key), - ("final_evaluations", self.last_evaluation_results), - ("hyperparameters", self.hyperparameter_key.parameters), - ("cross_experiment_parameters", self.cross_experiment_key.parameters), - ("train_features", None), # TODO: Record the column features in train df - ("platform", node()), - ("source_script", self.source_script), - ("notes", self.notes or ""), - ("aggregates", self.stat_aggregates), - ] - ) + self.result = dict() + self.result["experiment_id"] = self.experiment_id + self.result["algorithm_name"] = self.algorithm_name + self.result["module_name"] = self.module_name + self.result["hyperparameter_key"] = self.hyperparameter_key.key + self.result["cross_experiment_key"] = self.cross_experiment_key.key + self.result["final_evaluations"] = self.last_evaluation_results + self.result["hyperparameters"] = self.hyperparameter_key.parameters + self.result["cross_experiment_parameters"] = self.cross_experiment_key.parameters + self.result["train_features"] = None # TODO: Record the column features in train df + self.result["platform"] = node() + self.result["source_script"] = self.source_script + self.result["notes"] = self.notes or "" + self.result["aggregates"] = self.stat_aggregates #################### Filter Hyperparameters' model_init_params #################### self.result["hyperparameters"]["model_init_params"] = subdict( @@ -230,22 +228,29 @@ def format_result(self): ) def save_result(self): - """Save the Experiment description as a .json file, named after :attr:`experiment_id`. If - :attr:`do_full_save` is a callable and returns False when given the description object, the - result recording loop will be broken, and the remaining result files will not be saved + """Save the Experiment Description as a .yaml/.json file, named after :attr:`experiment_id`. + If :attr:`do_full_save` is a callable and returns False when given the description object, + the result recording loop will be broken, and the remaining result files will not be saved Returns ------- - 'break' - This string will be returned if :attr:`do_full_save` is a callable and returns False - when given the description object. This is the signal for - :class:`recorders.RecorderList` to stop recording result files""" - try: - write_json(f"{self.result_path}/{self.experiment_id}.json", self.result, do_clear=False) - except FileNotFoundError: - make_dirs(self.result_path, exist_ok=False) - write_json(f"{self.result_path}/{self.experiment_id}.json", self.result, do_clear=False) - + "break", or None + "break" is returned if :attr:`do_full_save` is callable and returns False when given the + Description (:attr:`result`). This is the signal for :class:`recorders.RecorderList` to + stop saving files. Otherwise, nothing is returned, continuing the recording process + + See Also + -------- + :attr:`hyperparameter_hunter.settings.G.description_format` + Dictates whether to save Description as a .yaml file (default), or .json""" + if G.description_format == "yaml": + write_yaml(f"{self.result_path}/{self.experiment_id}.yaml", self.result) + elif G.description_format == "json": + write_json(f"{self.result_path}/{self.experiment_id}.json", self.result) + else: + raise ValueError(f"Unexpected `G.description_format`: {G.description_format}") + + #################### Decide Whether to Kill Recorder Loop #################### if (self.do_full_save is not None) and (not self.do_full_save(self.result)): G.warn("Breaking result-saving loop early! Remaining result files will not be saved") return "break" diff --git a/hyperparameter_hunter/i_o/result_reader.py b/hyperparameter_hunter/i_o/result_reader.py index 4f5f5c28..91221016 100644 --- a/hyperparameter_hunter/i_o/result_reader.py +++ b/hyperparameter_hunter/i_o/result_reader.py @@ -1,3 +1,11 @@ +"""This module handles reading and processing saved Experiment result files and determining their +match status to guidelines/search space + +Related +------- +:mod:`hyperparameter_hunter.optimization.protocol_core` + OptPros use :class:`ResultFinder` to identify saved Experiment results that fit within the + current guidelines/search space""" ################################################## # Import Own Assets ################################################## @@ -550,9 +558,8 @@ def find(self): providing an updated "feature_engineer" value for compatible candidates to use. Specifics are documented in :meth:`does_match_feature_engineer`""" for exp_id in self.experiment_ids: - description_path = f"{self.descriptions_dir}/{exp_id}.json" # TODO: Get `description` from `get_scored_params` - Take whatever value `sort` needs - params, score = get_scored_params(description_path, self.target_metric) + params, score = get_scored_params(self.descriptions_dir, exp_id, self.target_metric) #################### Match Init Params #################### self.does_match_init_params_space(exp_id, params["model_init_params"], score) diff --git a/hyperparameter_hunter/optimization/protocol_core.py b/hyperparameter_hunter/optimization/protocol_core.py index 0762f2d6..dfc2c158 100644 --- a/hyperparameter_hunter/optimization/protocol_core.py +++ b/hyperparameter_hunter/optimization/protocol_core.py @@ -8,7 +8,7 @@ Defines the optimization classes that are intended for direct use. All classes defined in :mod:`hyperparameter_hunter.optimization.backends.skopt.protocols` should be descendants of :class:`~hyperparameter_hunter.optimization.protocol_core.BaseOptPro` -:mod:`hyperparameter_hunter.result_reader` +:mod:`hyperparameter_hunter.i_o.result_reader` Used to locate result files for Experiments that are similar to the current optimization constraints, and produce data to learn from in the case of :class:`SKOptPro` :mod:`hyperparameter_hunter.space` diff --git a/hyperparameter_hunter/settings.py b/hyperparameter_hunter/settings.py index 2d415f6a..22bf9a2c 100644 --- a/hyperparameter_hunter/settings.py +++ b/hyperparameter_hunter/settings.py @@ -94,6 +94,9 @@ class G(object): target, which is the same form as the original target data. Continuing the example of label-encoded target data, and an :class:`feature_engineering.EngineerStep` to one-hot encode the target, in this case, label-encoded predictions will be saved. + description_format: {"yaml", "json"}, default="yaml" + How to save Experiment Description files. See + :meth:`hyperparameter_hunter.i_o.recorders.RecorderList.__init__` priority_callbacks: Tuple Intended for internal use only. The contents of this tuple are inserted at the front of an Experiment's list of callback bases via :class:`experiment_core.ExperimentMeta`, ahead of @@ -116,11 +119,13 @@ class G(object): #################### Miscellaneous Settings #################### save_transformed_predictions = False + description_format = "yaml" #################### Internal Settings #################### priority_callbacks = tuple() - #################### Standard Logging Set by :class:`environment.Environment` #################### + #################### Standard Logging #################### + # Set by :class:`environment.Environment` @staticmethod def log(content, *args, **kwargs): """Set in :meth:`environment.Environment.initialize_reporting` to the updated version of @@ -139,7 +144,8 @@ def warn(content, *args, **kwargs): :meth:`reporting.ReportingHandler.warn`""" warnings.warn(content, *args, **kwargs) - #################### Optimization Logging Set by :class:`protocol_core.BaseOptPro` #################### + #################### Optimization Logging #################### + # Set by :class:`protocol_core.BaseOptPro` log_ = print debug_ = print warn_ = warnings.warn diff --git a/hyperparameter_hunter/utils/optimization_utils.py b/hyperparameter_hunter/utils/optimization_utils.py index 5f444c45..abffd52f 100644 --- a/hyperparameter_hunter/utils/optimization_utils.py +++ b/hyperparameter_hunter/utils/optimization_utils.py @@ -13,7 +13,7 @@ from hyperparameter_hunter.keys.hashing import make_hash_sha256 from hyperparameter_hunter.space.dimensions import Real, Integer, Categorical, RejectedOptional from hyperparameter_hunter.utils.boltons_utils import get_path, remap -from hyperparameter_hunter.utils.file_utils import read_json +from hyperparameter_hunter.utils.file_utils import read_json, read_yaml from hyperparameter_hunter.utils.general_utils import extra_enter_attrs try: @@ -26,6 +26,7 @@ ################################################## from contextlib import suppress import pandas as pd +from pathlib import Path ################################################## @@ -88,13 +89,42 @@ def get_ids_by( return matching_ids -def get_scored_params(experiment_description_path, target_metric, get_description=False): +def find_experiment_description(description_dir: str, experiment_id: str) -> dict: + """Locate and return the Description file contents for `experiment_id`. Assumes the Description + file extension to be in {".yaml", ".yml", ".json"}, and checks for files in that order, + returning the first one found + + Parameters + ---------- + description_dir: String + Path to a directory containing the Description files of saved Experiments + experiment_id: String + ID of the saved Experiment whose Description should be returned + + Returns + ------- + Dict + Experiment Description file contents""" + description_path = Path(description_dir) / experiment_id # Extension unknown right now + + for (extension, reader) in [(".yaml", read_yaml), (".yml", read_yaml), (".json", read_json)]: + try: + return reader(description_path.with_suffix(extension)) + except FileNotFoundError: + continue + else: + raise ValueError(f"Expected YAML/JSON `description_path`, not {description_path}") + + +def get_scored_params(description_dir, experiment_id, target_metric, get_description=False): """Retrieve the hyperparameters of a completed Experiment, along with its performance evaluation Parameters ---------- - experiment_description_path: String - The path to an Experiment's description .json file + description_dir: String + Path to a directory containing the Description files of saved Experiments + experiment_id: String + ID of the saved Experiment whose Description should be returned target_metric: Tuple A path denoting the metric to be used. If tuple, the first value should be one of ['oof', 'holdout', 'in_fold'], and the second value should be the name of a metric supplied in @@ -110,7 +140,7 @@ def get_scored_params(experiment_description_path, target_metric, get_descriptio A dict of the hyperparameters used by the Experiment evaluation: Float Value of the Experiment's `target_metric`""" - description = read_json(file_path=experiment_description_path) + description = find_experiment_description(description_dir, experiment_id) evaluation = get_path(description["final_evaluations"], target_metric) all_hyperparameters = description["hyperparameters"] diff --git a/tests/test_space/test_space.py b/tests/test_space/test_space.py index 75667391..a219c028 100644 --- a/tests/test_space/test_space.py +++ b/tests/test_space/test_space.py @@ -2,16 +2,43 @@ # Import Own Assets ################################################## from hyperparameter_hunter import Real, Categorical, Integer -from hyperparameter_hunter.feature_engineering import EngineerStep +from hyperparameter_hunter import Environment, CVExperiment, BayesianOptPro, EngineerStep from hyperparameter_hunter.space.dimensions import RejectedOptional from hyperparameter_hunter.space.space_core import Space +from hyperparameter_hunter.utils.learning_utils import get_iris_data ################################################## # Import Miscellaneous Assets ################################################## +from os import makedirs import pytest +from shutil import rmtree from sys import maxsize +################################################## +# Import Learning Assets +################################################## +from sklearn.ensemble import RandomForestClassifier + +################################################## +# Global Settings +################################################## +assets_dir = "hyperparameter_hunter/__TEST__HyperparameterHunterAssets__" +# assets_dir = "hyperparameter_hunter/HyperparameterHunterAssets" + + +@pytest.fixture(scope="function", autouse=False) +def hh_assets(): + """Construct a temporary HyperparameterHunterAssets directory that exists only for the duration + of the tests contained in each function, before it and its contents are deleted""" + temp_assets_path = assets_dir + try: + makedirs(temp_assets_path) + except FileExistsError: + rmtree(temp_assets_path) + makedirs(temp_assets_path) + yield + ################################################## # `Space.rvs` with `Categorical` Strings @@ -295,3 +322,68 @@ def test_get_by_name_use_location(space, name, expected): ################################################## def test_rejected_optional_repr(): assert "{!r}".format(RejectedOptional()) == "RejectedOptional()" + + +################################################## +# Nested Dimension Optimization Matching Tests +################################################## +# Regression tests to ensure proper Experiment result matching when optimizing `Dimension` s nested +# inside other structures. See https://github.com/HunterMcGushion/hyperparameter_hunter/issues/183 + + +@pytest.fixture() +def env_iris(): + env = Environment( + train_dataset=get_iris_data(), + results_path=assets_dir, + target_column="species", + metrics=["hamming_loss"], + cv_params=dict(n_splits=5, shuffle=True, random_state=32), + ) + return env + + +def get_nested_dict_rfc_opt_pro() -> BayesianOptPro: + """Get a :class:`BayesianOptPro` instance, forged with Dimensions in a nested `class_weight` + dict under `model_init_params`--for `RandomForestClassifier`""" + opt = BayesianOptPro(iterations=2, random_state=32, n_initial_points=1) + opt.forge_experiment( + model_initializer=RandomForestClassifier, + model_init_params=dict( + n_estimators=Integer(5, 100), + # Below `class_weight` is object under test + class_weight={0: Categorical([1, 3]), 1: Categorical([1, 4]), 2: Integer(1, 9)}, + ), + ) + return opt + + +def test_nested_dict_matching_exp(env_iris): + """Test that individual values in a `class_weight` dict can be optimized and matched with + compatible saved Experiment results. See HH issue #183 (linked above) for details""" + # Experiment, whose saved results should be matched by `opt` + exp = CVExperiment( + RandomForestClassifier, dict(n_estimators=10, class_weight={0: 1, 1: 1, 2: 1}) + ) + + # OptPro, whose Dimensions should match with results of `exp` + opt = get_nested_dict_rfc_opt_pro() + opt.go() + + # Check that `opt` matched with `exp` + assert exp.experiment_id in [_[2] for _ in opt.similar_experiments] + + +def test_nested_dict_matching_opt(env_iris): + """Test that individual values in a `class_weight` dict can be optimized and matched with + compatible saved OptPro results. See HH issue #183 (linked above) for details""" + # First OptPro, whose Dimensions should match with below `opt_1` + opt_0 = get_nested_dict_rfc_opt_pro() + opt_0.go() + + # Second OptPro, identical to `opt_0`, whose Dimensions should match with results of `opt_0` + opt_1 = get_nested_dict_rfc_opt_pro() + opt_1.go() + + # Assert `opt_1` matched with all Experiments executed by `opt_0` + assert len(opt_1.similar_experiments) == opt_0.successful_iterations From f4db1ffd921488703e6d1b7a4543dfa8586aafed Mon Sep 17 00:00:00 2001 From: Hunter McGushion Date: Sun, 27 Oct 2019 01:55:20 -0700 Subject: [PATCH 6/8] Remove superfluous for/else --- hyperparameter_hunter/utils/optimization_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hyperparameter_hunter/utils/optimization_utils.py b/hyperparameter_hunter/utils/optimization_utils.py index abffd52f..4050c4c5 100644 --- a/hyperparameter_hunter/utils/optimization_utils.py +++ b/hyperparameter_hunter/utils/optimization_utils.py @@ -112,8 +112,8 @@ def find_experiment_description(description_dir: str, experiment_id: str) -> dic return reader(description_path.with_suffix(extension)) except FileNotFoundError: continue - else: - raise ValueError(f"Expected YAML/JSON `description_path`, not {description_path}") + + raise ValueError(f"Expected YAML/JSON `description_path`, not {description_path}") def get_scored_params(description_dir, experiment_id, target_metric, get_description=False): From 39d122999feb8e37b0185cfb9a7cf0e96212d66e Mon Sep 17 00:00:00 2001 From: Hunter McGushion Date: Mon, 28 Oct 2019 16:56:31 -0700 Subject: [PATCH 7/8] Remove outdated `YAMLDescriptionRecorder` --- hyperparameter_hunter/i_o/recorders.py | 30 ++++++-------------------- tests/smoke_tests/test_general.py | 14 ++---------- 2 files changed, 8 insertions(+), 36 deletions(-) diff --git a/hyperparameter_hunter/i_o/recorders.py b/hyperparameter_hunter/i_o/recorders.py index 85ae7d33..2b11da9b 100644 --- a/hyperparameter_hunter/i_o/recorders.py +++ b/hyperparameter_hunter/i_o/recorders.py @@ -13,15 +13,18 @@ from hyperparameter_hunter.i_o.exceptions import EnvironmentInactiveError, EnvironmentInvalidError from hyperparameter_hunter.i_o.leaderboards import GlobalLeaderboard from hyperparameter_hunter.settings import G -from hyperparameter_hunter.utils.file_utils import write_json, add_to_json, make_dirs, read_json -from hyperparameter_hunter.utils.file_utils import RetryMakeDirs, write_yaml +from hyperparameter_hunter.utils.file_utils import ( + add_to_json, + RetryMakeDirs, + write_json, + write_yaml, +) from hyperparameter_hunter.utils.general_utils import subdict ################################################## # Import Miscellaneous Assets ################################################## from abc import ABCMeta, abstractmethod -from collections import OrderedDict from platform import node import shutil from sys import exc_info @@ -420,24 +423,3 @@ def format_result(self): def save_result(self): """Save the updated leaderboard file""" self.result.save(path=self.result_paths["unsorted_id_leaderboard"]) - - -class YAMLDescriptionRecorder(BaseRecorder): - result_path_key = "yaml_description" - required_attributes = ["result_paths", "experiment_id"] - - def format_result(self): - pass - - def save_result(self): - from yaml import dump - - self.result = read_json(f"{self.result_paths['description']}/{self.experiment_id}.json") - - make_dirs(self.result_path, exist_ok=True) - with open(f"{self.result_path}/{self.experiment_id}.yml", "w+") as f: - dump(self.result, f, default_flow_style=False, width=200) - - -if __name__ == "__main__": - pass diff --git a/tests/smoke_tests/test_general.py b/tests/smoke_tests/test_general.py index fca2c6c5..c221cba3 100644 --- a/tests/smoke_tests/test_general.py +++ b/tests/smoke_tests/test_general.py @@ -4,10 +4,7 @@ from hyperparameter_hunter import Environment, CVExperiment, Real, Integer, Categorical from hyperparameter_hunter import BayesianOptPro, ExtraTreesOptPro, lambda_callback from hyperparameter_hunter.callbacks.recipes import confusion_matrix_oof, confusion_matrix_holdout -from hyperparameter_hunter.i_o.recorders import ( - YAMLDescriptionRecorder, - UnsortedIDLeaderboardRecorder, -) +from hyperparameter_hunter.i_o.recorders import UnsortedIDLeaderboardRecorder from hyperparameter_hunter.i_o.result_reader import has_experiment_result_file from hyperparameter_hunter.utils.learning_utils import ( get_toy_classification_data, @@ -135,14 +132,7 @@ def env_4(): @pytest.fixture( scope="function", autouse=False, - params=[ - [], - [(UnsortedIDLeaderboardRecorder, "Leaderboards/UnsortedIDLeaderboard.csv")], - [ - (UnsortedIDLeaderboardRecorder, "Leaderboards/UnsortedIDLeaderboard.csv"), - (YAMLDescriptionRecorder, "Experiments/YAMLDescriptions"), - ], - ], + params=[[], [(UnsortedIDLeaderboardRecorder, "Leaderboards/UnsortedIDLeaderboard.csv")]], ) def env_5(request): return Environment( From 44bd1be25dea76b735acc10761b0415f1937af3f Mon Sep 17 00:00:00 2001 From: Hunter McGushion Date: Mon, 28 Oct 2019 18:14:31 -0700 Subject: [PATCH 8/8] Refactor long `forge_experiment` line --- hyperparameter_hunter/optimization/protocol_core.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hyperparameter_hunter/optimization/protocol_core.py b/hyperparameter_hunter/optimization/protocol_core.py index dfc2c158..457352a3 100644 --- a/hyperparameter_hunter/optimization/protocol_core.py +++ b/hyperparameter_hunter/optimization/protocol_core.py @@ -393,17 +393,17 @@ def forge_experiment( #################### Deal with Keras #################### if self.module_name == "keras": - reusable_build_fn, reusable_wrapper_params, dummy_layers, dummy_compile_params = keras_prep_workflow( + build_fn, wrapper_params, dummy_layers, dummy_compile_params = keras_prep_workflow( self.model_initializer, self.model_init_params["build_fn"], self.model_extra_params, self.source_script, ) - self.model_init_params = dict(build_fn=reusable_build_fn) - self.model_extra_params = reusable_wrapper_params + self.model_init_params = dict(build_fn=build_fn) # Reusable + self.model_extra_params = wrapper_params # Reusable self.dummy_layers = dummy_layers self.dummy_compile_params = dummy_compile_params - # FLAG: Deal with capitalization conflicts when comparing similar experiments: `optimizer`='Adam' vs 'adam' + # FLAG: Handle `optimizer` capitalization conflicts: `optimizer`="Adam" vs "adam" self.set_dimensions()