HunterMcGushion · HunterMcGushion · Oct 16, 2019 · Oct 26, 2019 · Oct 26, 2019 · Oct 27, 2019
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,15 @@
 ## [Unreleased]
 
 ### Features
+* Save Experiment Description files in YAML (from JSON)
+    * Makes Description files easier for humans to read
+    * Fixes issues with certain hyperparameter types ignored by JSON, such as dicts with non-string 
+      keys, and tuples
+    * If you hate the new YAML Descriptions and want to go back to JSON, you can set 
+      `settings.G.description_format` to "json"
+        * Consider opening [an issue](https://github.com/HunterMcGushion/hyperparameter_hunter/issues),
+          telling me this was a dumb decision. Feedback is very much appreciated (honestly)
+        * Be warned that reverting to "json" means the above-noted issues could occur 
 * Enabled optimization of tuple values via [`Categorical`](https://hyperparameter-hunter.readthedocs.io/en/stable/source/hyperparameter_hunter.space.html#hyperparameter_hunter.space.dimensions.Categorical)
     * This can be used with Keras to search over different `kernel_size` values for `Conv2D` or 
       `pool_size` values for `MaxPooling2D`, for example:

diff --git a/hyperparameter_hunter/i_o/exceptions.py b/hyperparameter_hunter/i_o/exceptions.py
@@ -130,11 +130,6 @@ def __init__(self, candidate, template):
         super(IncompatibleCandidateError, self).__init__(message)
 
 
-class ContinueRemap(Exception):
-    def __str__(self):
-        return "Just keep doing what you were doing"
-
-
 ##################################################
 # Deprecation Warnings
 ##################################################

diff --git a/hyperparameter_hunter/i_o/recorders.py b/hyperparameter_hunter/i_o/recorders.py
@@ -13,15 +13,18 @@
 from hyperparameter_hunter.i_o.exceptions import EnvironmentInactiveError, EnvironmentInvalidError
 from hyperparameter_hunter.i_o.leaderboards import GlobalLeaderboard
 from hyperparameter_hunter.settings import G
-from hyperparameter_hunter.utils.file_utils import write_json, add_to_json, make_dirs, read_json
-from hyperparameter_hunter.utils.file_utils import RetryMakeDirs
+from hyperparameter_hunter.utils.file_utils import (
+    add_to_json,
+    RetryMakeDirs,
+    write_json,
+    write_yaml,
+)
 from hyperparameter_hunter.utils.general_utils import subdict
 
 ##################################################
 # Import Miscellaneous Assets
 ##################################################
 from abc import ABCMeta, abstractmethod
-from collections import OrderedDict
 from platform import node
 import shutil
 from sys import exc_info
@@ -124,7 +127,8 @@ def __init__(self, file_blacklist=None, extra_recorders=None):
             an Experiment. The contents of `extra_recorders` are blacklisted in the same way as
             normal `recorders`. That is, if `file_blacklist` contains the `result_path_key` of a
             recorder in `extra_recorders`, that recorder is blacklisted"""
-        # WARNING: Take care if modifying the order/contents of :attr:`recorders`. See :meth:`save_result` documentation for info
+        # WARNING: Take care if modifying the order/contents of :attr:`recorders`
+        #   See :meth:`save_result` documentation for info
         self.recorders = [
             TestedKeyRecorder,
             LeaderboardEntryRecorder,
@@ -193,7 +197,7 @@ class DescriptionRecorder(BaseRecorder):
         "cross_experiment_key",
         "last_evaluation_results",
         "stat_aggregates",
-        # 'train_features',
+        # "train_features",
         "source_script",
         "notes",
         "model_initializer",
@@ -204,48 +208,52 @@ class DescriptionRecorder(BaseRecorder):
     ]
 
     def format_result(self):
-        """Format an OrderedDict containing the Experiment's identifying attributes, results,
+        """Format a dict containing the Experiment's identifying attributes, results,
         hyperparameters used, and other stats or information that may be useful"""
-        self.result = OrderedDict(
-            [
-                ("experiment_id", self.experiment_id),
-                ("algorithm_name", self.algorithm_name),
-                ("module_name", self.module_name),
-                ("hyperparameter_key", self.hyperparameter_key.key),
-                ("cross_experiment_key", self.cross_experiment_key.key),
-                ("final_evaluations", self.last_evaluation_results),
-                ("hyperparameters", self.hyperparameter_key.parameters),
-                ("cross_experiment_parameters", self.cross_experiment_key.parameters),
-                ("train_features", None),  # TODO: Record the column features in train df
-                ("platform", node()),
-                ("source_script", self.source_script),
-                ("notes", self.notes or ""),
-                ("aggregates", self.stat_aggregates),
-            ]
-        )
+        self.result = dict()
+        self.result["experiment_id"] = self.experiment_id
+        self.result["algorithm_name"] = self.algorithm_name
+        self.result["module_name"] = self.module_name
+        self.result["hyperparameter_key"] = self.hyperparameter_key.key
+        self.result["cross_experiment_key"] = self.cross_experiment_key.key
+        self.result["final_evaluations"] = self.last_evaluation_results
+        self.result["hyperparameters"] = self.hyperparameter_key.parameters
+        self.result["cross_experiment_parameters"] = self.cross_experiment_key.parameters
+        self.result["train_features"] = None  # TODO: Record the column features in train df
+        self.result["platform"] = node()
+        self.result["source_script"] = self.source_script
+        self.result["notes"] = self.notes or ""
+        self.result["aggregates"] = self.stat_aggregates
 
         #################### Filter Hyperparameters' model_init_params ####################
         self.result["hyperparameters"]["model_init_params"] = subdict(
             self.result["hyperparameters"]["model_init_params"], drop=["random_state", "seed"]
         )
 
     def save_result(self):
-        """Save the Experiment description as a .json file, named after :attr:`experiment_id`. If
-        :attr:`do_full_save` is a callable and returns False when given the description object, the
-        result recording loop will be broken, and the remaining result files will not be saved
+        """Save the Experiment Description as a .yaml/.json file, named after :attr:`experiment_id`.
+        If :attr:`do_full_save` is a callable and returns False when given the description object,
+        the result recording loop will be broken, and the remaining result files will not be saved
 
         Returns
         -------
-        'break'
-            This string will be returned if :attr:`do_full_save` is a callable and returns False
-            when given the description object. This is the signal for
-            :class:`recorders.RecorderList` to stop recording result files"""
-        try:
-            write_json(f"{self.result_path}/{self.experiment_id}.json", self.result, do_clear=False)
-        except FileNotFoundError:
-            make_dirs(self.result_path, exist_ok=False)
-            write_json(f"{self.result_path}/{self.experiment_id}.json", self.result, do_clear=False)
-
+        "break", or None
+            "break" is returned if :attr:`do_full_save` is callable and returns False when given the
+            Description (:attr:`result`). This is the signal for :class:`recorders.RecorderList` to
+            stop saving files. Otherwise, nothing is returned, continuing the recording process
+
+        See Also
+        --------
+        :attr:`hyperparameter_hunter.settings.G.description_format`
+            Dictates whether to save Description as a .yaml file (default), or .json"""
+        if G.description_format == "yaml":
+            write_yaml(f"{self.result_path}/{self.experiment_id}.yaml", self.result)
+        elif G.description_format == "json":
+            write_json(f"{self.result_path}/{self.experiment_id}.json", self.result)
+        else:
+            raise ValueError(f"Unexpected `G.description_format`: {G.description_format}")
+
+        #################### Decide Whether to Kill Recorder Loop ####################
         if (self.do_full_save is not None) and (not self.do_full_save(self.result)):
             G.warn("Breaking result-saving loop early! Remaining result files will not be saved")
             return "break"
@@ -415,24 +423,3 @@ def format_result(self):
     def save_result(self):
         """Save the updated leaderboard file"""
         self.result.save(path=self.result_paths["unsorted_id_leaderboard"])
-
-
-class YAMLDescriptionRecorder(BaseRecorder):
-    result_path_key = "yaml_description"
-    required_attributes = ["result_paths", "experiment_id"]
-
-    def format_result(self):
-        pass
-
-    def save_result(self):
-        from yaml import dump
-
-        self.result = read_json(f"{self.result_paths['description']}/{self.experiment_id}.json")
-
-        make_dirs(self.result_path, exist_ok=True)
-        with open(f"{self.result_path}/{self.experiment_id}.yml", "w+") as f:
-            dump(self.result, f, default_flow_style=False, width=200)
-
-
-if __name__ == "__main__":
-    pass
diff --git a/hyperparameter_hunter/i_o/result_reader.py b/hyperparameter_hunter/i_o/result_reader.py
@@ -1,3 +1,11 @@
+"""This module handles reading and processing saved Experiment result files and determining their
+match status to guidelines/search space
+
+Related
+-------
+:mod:`hyperparameter_hunter.optimization.protocol_core`
+    OptPros use :class:`ResultFinder` to identify saved Experiment results that fit within the
+    current guidelines/search space"""
 ##################################################
 # Import Own Assets
 ##################################################
@@ -550,9 +558,8 @@ def find(self):
             providing an updated "feature_engineer" value for compatible candidates to use.
             Specifics are documented in :meth:`does_match_feature_engineer`"""
         for exp_id in self.experiment_ids:
-            description_path = f"{self.descriptions_dir}/{exp_id}.json"
             # TODO: Get `description` from `get_scored_params` - Take whatever value `sort` needs
-            params, score = get_scored_params(description_path, self.target_metric)
+            params, score = get_scored_params(self.descriptions_dir, exp_id, self.target_metric)
 
             #################### Match Init Params ####################
             self.does_match_init_params_space(exp_id, params["model_init_params"], score)
@@ -929,14 +936,14 @@ def _visit(path, key, value):
 ##################################################
 # Utilities
 ##################################################
-def has_experiment_result_file(results_dir, experiment_id, result_type=None):
-    """Check if the specified result files exist in `results_dir` for Experiment `experiment_id`
+def has_experiment_result_file(results_dir, exp_id, result_type=None):
+    """Check if the specified result files exist in `results_dir` for Experiment `exp_id`
 
     Parameters
     ----------
     results_dir: String
         HyperparameterHunterAssets directory in which to search for Experiment result files
-    experiment_id: String, or BaseExperiment
+    exp_id: String, or BaseExperiment
         ID of the Experiment whose result files should be searched for in `results_dir`. If not
         string, should be an instance of a descendant of
         :class:`~hyperparameter_hunter.experiments.BaseExperiment` with an "experiment_id" attribute
@@ -953,8 +960,13 @@ def has_experiment_result_file(results_dir, experiment_id, result_type=None):
     -------
     Boolean
         True if all result files specified by `result_type` exist in `results_dir` for the
-        Experiment specified by `experiment_id`. Else, False"""
-    experiment_id = experiment_id if isinstance(experiment_id, str) else experiment_id.experiment_id
+        Experiment specified by `exp_id`. Else, False"""
+    exp_id = exp_id if isinstance(exp_id, str) else exp_id.experiment_id
+
+    if results_dir.endswith("HyperparameterHunterAssets"):
+        exp_dir = Path(results_dir) / "Experiments"
+    else:
+        exp_dir = Path(results_dir) / "HyperparameterHunterAssets" / "Experiments"
 
     #################### Format `result_type` ####################
     if not result_type:
@@ -972,25 +984,20 @@ def has_experiment_result_file(results_dir, experiment_id, result_type=None):
         result_type = [result_type]
 
     for subdir in result_type:
-        #################### Select Result File Suffix ####################
+        #################### Select Result File Suffixes ####################
         if subdir == "Descriptions":
-            suffix = ".json"
+            suffixes = (".yaml", ".yml", ".json")
         elif subdir == "Heartbeats":
-            suffix = ".log"
+            suffixes = (".log",)
         elif subdir == "ScriptBackups":
-            suffix = ".py"
+            suffixes = (".py",)
         elif subdir.startswith("Predictions"):
-            suffix = ".csv"
+            suffixes = (".csv",)
         else:
             raise ValueError(f"Cannot resolve suffix for subdir `result_type`: {subdir}")
 
         #################### Check "Experiments" Directory ####################
-        if results_dir.endswith("HyperparameterHunterAssets"):
-            experiments_dir = Path(results_dir) / "Experiments"
-        else:
-            experiments_dir = Path(results_dir) / "HyperparameterHunterAssets" / "Experiments"
-
-        if not (experiments_dir / subdir / f"{experiment_id}{suffix}").exists():
+        if not any((exp_dir / subdir / f"{exp_id}{suffix}").exists() for suffix in suffixes):
             return False
 
     return True
diff --git a/hyperparameter_hunter/optimization/protocol_core.py b/hyperparameter_hunter/optimization/protocol_core.py
@@ -8,7 +8,7 @@
     Defines the optimization classes that are intended for direct use. All classes defined in
     :mod:`hyperparameter_hunter.optimization.backends.skopt.protocols` should be descendants of
     :class:`~hyperparameter_hunter.optimization.protocol_core.BaseOptPro`
-:mod:`hyperparameter_hunter.result_reader`
+:mod:`hyperparameter_hunter.i_o.result_reader`
     Used to locate result files for Experiments that are similar to the current optimization
     constraints, and produce data to learn from in the case of :class:`SKOptPro`
 :mod:`hyperparameter_hunter.space`
@@ -393,17 +393,17 @@ def forge_experiment(
 
         #################### Deal with Keras ####################
         if self.module_name == "keras":
-            reusable_build_fn, reusable_wrapper_params, dummy_layers, dummy_compile_params = keras_prep_workflow(
+            build_fn, wrapper_params, dummy_layers, dummy_compile_params = keras_prep_workflow(
                 self.model_initializer,
                 self.model_init_params["build_fn"],
                 self.model_extra_params,
                 self.source_script,
             )
-            self.model_init_params = dict(build_fn=reusable_build_fn)
-            self.model_extra_params = reusable_wrapper_params
+            self.model_init_params = dict(build_fn=build_fn)  # Reusable
+            self.model_extra_params = wrapper_params  # Reusable
             self.dummy_layers = dummy_layers
             self.dummy_compile_params = dummy_compile_params
-            # FLAG: Deal with capitalization conflicts when comparing similar experiments: `optimizer`='Adam' vs 'adam'
+            # FLAG: Handle `optimizer` capitalization conflicts: `optimizer`="Adam" vs "adam"
 
         self.set_dimensions()
 

diff --git a/hyperparameter_hunter/settings.py b/hyperparameter_hunter/settings.py
@@ -94,6 +94,9 @@ class G(object):
         target, which is the same form as the original target data. Continuing the example of
         label-encoded target data, and an :class:`feature_engineering.EngineerStep` to one-hot
         encode the target, in this case, label-encoded predictions will be saved.
+    description_format: {"yaml", "json"}, default="yaml"
+        How to save Experiment Description files. See
+        :meth:`hyperparameter_hunter.i_o.recorders.RecorderList.__init__`
     priority_callbacks: Tuple
         Intended for internal use only. The contents of this tuple are inserted at the front of an
         Experiment's list of callback bases via :class:`experiment_core.ExperimentMeta`, ahead of
@@ -116,11 +119,13 @@ class G(object):
 
     #################### Miscellaneous Settings ####################
     save_transformed_predictions = False
+    description_format = "yaml"
 
     #################### Internal Settings ####################
     priority_callbacks = tuple()
 
-    #################### Standard Logging Set by :class:`environment.Environment` ####################
+    #################### Standard Logging ####################
+    # Set by :class:`environment.Environment`
     @staticmethod
     def log(content, *args, **kwargs):
         """Set in :meth:`environment.Environment.initialize_reporting` to the updated version of
@@ -139,7 +144,8 @@ def warn(content, *args, **kwargs):
         :meth:`reporting.ReportingHandler.warn`"""
         warnings.warn(content, *args, **kwargs)
 
-    #################### Optimization Logging Set by :class:`protocol_core.BaseOptPro` ####################
+    #################### Optimization Logging ####################
+    # Set by :class:`protocol_core.BaseOptPro`
     log_ = print
     debug_ = print
     warn_ = warnings.warn