Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Bug/class weight matching #205

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@
## [Unreleased]

### Features
* Save Experiment Description files in YAML (from JSON)
* Makes Description files easier for humans to read
* Fixes issues with certain hyperparameter types ignored by JSON, such as dicts with non-string
keys, and tuples
* If you hate the new YAML Descriptions and want to go back to JSON, you can set
`settings.G.description_format` to "json"
* Consider opening [an issue](https://github.com/HunterMcGushion/hyperparameter_hunter/issues),
telling me this was a dumb decision. Feedback is very much appreciated (honestly)
* Be warned that reverting to "json" means the above-noted issues could occur
* Enabled optimization of tuple values via [`Categorical`](https://hyperparameter-hunter.readthedocs.io/en/stable/source/hyperparameter_hunter.space.html#hyperparameter_hunter.space.dimensions.Categorical)
* This can be used with Keras to search over different `kernel_size` values for `Conv2D` or
`pool_size` values for `MaxPooling2D`, for example:
Expand Down
5 changes: 0 additions & 5 deletions hyperparameter_hunter/i_o/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,11 +130,6 @@ def __init__(self, candidate, template):
super(IncompatibleCandidateError, self).__init__(message)


class ContinueRemap(Exception):
def __str__(self):
return "Just keep doing what you were doing"


##################################################
# Deprecation Warnings
##################################################
Expand Down
101 changes: 44 additions & 57 deletions hyperparameter_hunter/i_o/recorders.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,18 @@
from hyperparameter_hunter.i_o.exceptions import EnvironmentInactiveError, EnvironmentInvalidError
from hyperparameter_hunter.i_o.leaderboards import GlobalLeaderboard
from hyperparameter_hunter.settings import G
from hyperparameter_hunter.utils.file_utils import write_json, add_to_json, make_dirs, read_json
from hyperparameter_hunter.utils.file_utils import RetryMakeDirs
from hyperparameter_hunter.utils.file_utils import (
add_to_json,
RetryMakeDirs,
write_json,
write_yaml,
)
from hyperparameter_hunter.utils.general_utils import subdict

##################################################
# Import Miscellaneous Assets
##################################################
from abc import ABCMeta, abstractmethod
from collections import OrderedDict
from platform import node
import shutil
from sys import exc_info
Expand Down Expand Up @@ -124,7 +127,8 @@ def __init__(self, file_blacklist=None, extra_recorders=None):
an Experiment. The contents of `extra_recorders` are blacklisted in the same way as
normal `recorders`. That is, if `file_blacklist` contains the `result_path_key` of a
recorder in `extra_recorders`, that recorder is blacklisted"""
# WARNING: Take care if modifying the order/contents of :attr:`recorders`. See :meth:`save_result` documentation for info
# WARNING: Take care if modifying the order/contents of :attr:`recorders`
# See :meth:`save_result` documentation for info
self.recorders = [
TestedKeyRecorder,
LeaderboardEntryRecorder,
Expand Down Expand Up @@ -193,7 +197,7 @@ class DescriptionRecorder(BaseRecorder):
"cross_experiment_key",
"last_evaluation_results",
"stat_aggregates",
# 'train_features',
# "train_features",
"source_script",
"notes",
"model_initializer",
Expand All @@ -204,48 +208,52 @@ class DescriptionRecorder(BaseRecorder):
]

def format_result(self):
"""Format an OrderedDict containing the Experiment's identifying attributes, results,
"""Format a dict containing the Experiment's identifying attributes, results,
hyperparameters used, and other stats or information that may be useful"""
self.result = OrderedDict(
[
("experiment_id", self.experiment_id),
("algorithm_name", self.algorithm_name),
("module_name", self.module_name),
("hyperparameter_key", self.hyperparameter_key.key),
("cross_experiment_key", self.cross_experiment_key.key),
("final_evaluations", self.last_evaluation_results),
("hyperparameters", self.hyperparameter_key.parameters),
("cross_experiment_parameters", self.cross_experiment_key.parameters),
("train_features", None), # TODO: Record the column features in train df
("platform", node()),
("source_script", self.source_script),
("notes", self.notes or ""),
("aggregates", self.stat_aggregates),
]
)
self.result = dict()
self.result["experiment_id"] = self.experiment_id
self.result["algorithm_name"] = self.algorithm_name
self.result["module_name"] = self.module_name
self.result["hyperparameter_key"] = self.hyperparameter_key.key
self.result["cross_experiment_key"] = self.cross_experiment_key.key
self.result["final_evaluations"] = self.last_evaluation_results
self.result["hyperparameters"] = self.hyperparameter_key.parameters
self.result["cross_experiment_parameters"] = self.cross_experiment_key.parameters
self.result["train_features"] = None # TODO: Record the column features in train df
self.result["platform"] = node()
self.result["source_script"] = self.source_script
self.result["notes"] = self.notes or ""
self.result["aggregates"] = self.stat_aggregates

#################### Filter Hyperparameters' model_init_params ####################
self.result["hyperparameters"]["model_init_params"] = subdict(
self.result["hyperparameters"]["model_init_params"], drop=["random_state", "seed"]
)

def save_result(self):
"""Save the Experiment description as a .json file, named after :attr:`experiment_id`. If
:attr:`do_full_save` is a callable and returns False when given the description object, the
result recording loop will be broken, and the remaining result files will not be saved
"""Save the Experiment Description as a .yaml/.json file, named after :attr:`experiment_id`.
If :attr:`do_full_save` is a callable and returns False when given the description object,
the result recording loop will be broken, and the remaining result files will not be saved

Returns
-------
'break'
This string will be returned if :attr:`do_full_save` is a callable and returns False
when given the description object. This is the signal for
:class:`recorders.RecorderList` to stop recording result files"""
try:
write_json(f"{self.result_path}/{self.experiment_id}.json", self.result, do_clear=False)
except FileNotFoundError:
make_dirs(self.result_path, exist_ok=False)
write_json(f"{self.result_path}/{self.experiment_id}.json", self.result, do_clear=False)

"break", or None
"break" is returned if :attr:`do_full_save` is callable and returns False when given the
Description (:attr:`result`). This is the signal for :class:`recorders.RecorderList` to
stop saving files. Otherwise, nothing is returned, continuing the recording process

See Also
--------
:attr:`hyperparameter_hunter.settings.G.description_format`
Dictates whether to save Description as a .yaml file (default), or .json"""
if G.description_format == "yaml":
write_yaml(f"{self.result_path}/{self.experiment_id}.yaml", self.result)
elif G.description_format == "json":
write_json(f"{self.result_path}/{self.experiment_id}.json", self.result)
else:
raise ValueError(f"Unexpected `G.description_format`: {G.description_format}")

#################### Decide Whether to Kill Recorder Loop ####################
if (self.do_full_save is not None) and (not self.do_full_save(self.result)):
G.warn("Breaking result-saving loop early! Remaining result files will not be saved")
return "break"
Expand Down Expand Up @@ -415,24 +423,3 @@ def format_result(self):
def save_result(self):
"""Save the updated leaderboard file"""
self.result.save(path=self.result_paths["unsorted_id_leaderboard"])


class YAMLDescriptionRecorder(BaseRecorder):
result_path_key = "yaml_description"
required_attributes = ["result_paths", "experiment_id"]

def format_result(self):
pass

def save_result(self):
from yaml import dump

self.result = read_json(f"{self.result_paths['description']}/{self.experiment_id}.json")

make_dirs(self.result_path, exist_ok=True)
with open(f"{self.result_path}/{self.experiment_id}.yml", "w+") as f:
dump(self.result, f, default_flow_style=False, width=200)


if __name__ == "__main__":
pass
43 changes: 25 additions & 18 deletions hyperparameter_hunter/i_o/result_reader.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
"""This module handles reading and processing saved Experiment result files and determining their
match status to guidelines/search space

Related
-------
:mod:`hyperparameter_hunter.optimization.protocol_core`
OptPros use :class:`ResultFinder` to identify saved Experiment results that fit within the
current guidelines/search space"""
##################################################
# Import Own Assets
##################################################
Expand Down Expand Up @@ -550,9 +558,8 @@ def find(self):
providing an updated "feature_engineer" value for compatible candidates to use.
Specifics are documented in :meth:`does_match_feature_engineer`"""
for exp_id in self.experiment_ids:
description_path = f"{self.descriptions_dir}/{exp_id}.json"
# TODO: Get `description` from `get_scored_params` - Take whatever value `sort` needs
params, score = get_scored_params(description_path, self.target_metric)
params, score = get_scored_params(self.descriptions_dir, exp_id, self.target_metric)

#################### Match Init Params ####################
self.does_match_init_params_space(exp_id, params["model_init_params"], score)
Expand Down Expand Up @@ -929,14 +936,14 @@ def _visit(path, key, value):
##################################################
# Utilities
##################################################
def has_experiment_result_file(results_dir, experiment_id, result_type=None):
"""Check if the specified result files exist in `results_dir` for Experiment `experiment_id`
def has_experiment_result_file(results_dir, exp_id, result_type=None):
"""Check if the specified result files exist in `results_dir` for Experiment `exp_id`

Parameters
----------
results_dir: String
HyperparameterHunterAssets directory in which to search for Experiment result files
experiment_id: String, or BaseExperiment
exp_id: String, or BaseExperiment
ID of the Experiment whose result files should be searched for in `results_dir`. If not
string, should be an instance of a descendant of
:class:`~hyperparameter_hunter.experiments.BaseExperiment` with an "experiment_id" attribute
Expand All @@ -953,8 +960,13 @@ def has_experiment_result_file(results_dir, experiment_id, result_type=None):
-------
Boolean
True if all result files specified by `result_type` exist in `results_dir` for the
Experiment specified by `experiment_id`. Else, False"""
experiment_id = experiment_id if isinstance(experiment_id, str) else experiment_id.experiment_id
Experiment specified by `exp_id`. Else, False"""
exp_id = exp_id if isinstance(exp_id, str) else exp_id.experiment_id

if results_dir.endswith("HyperparameterHunterAssets"):
exp_dir = Path(results_dir) / "Experiments"
else:
exp_dir = Path(results_dir) / "HyperparameterHunterAssets" / "Experiments"

#################### Format `result_type` ####################
if not result_type:
Expand All @@ -972,25 +984,20 @@ def has_experiment_result_file(results_dir, experiment_id, result_type=None):
result_type = [result_type]

for subdir in result_type:
#################### Select Result File Suffix ####################
#################### Select Result File Suffixes ####################
if subdir == "Descriptions":
suffix = ".json"
suffixes = (".yaml", ".yml", ".json")
elif subdir == "Heartbeats":
suffix = ".log"
suffixes = (".log",)
elif subdir == "ScriptBackups":
suffix = ".py"
suffixes = (".py",)
elif subdir.startswith("Predictions"):
suffix = ".csv"
suffixes = (".csv",)
else:
raise ValueError(f"Cannot resolve suffix for subdir `result_type`: {subdir}")

#################### Check "Experiments" Directory ####################
if results_dir.endswith("HyperparameterHunterAssets"):
experiments_dir = Path(results_dir) / "Experiments"
else:
experiments_dir = Path(results_dir) / "HyperparameterHunterAssets" / "Experiments"

if not (experiments_dir / subdir / f"{experiment_id}{suffix}").exists():
if not any((exp_dir / subdir / f"{exp_id}{suffix}").exists() for suffix in suffixes):
return False

return True
10 changes: 5 additions & 5 deletions hyperparameter_hunter/optimization/protocol_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
Defines the optimization classes that are intended for direct use. All classes defined in
:mod:`hyperparameter_hunter.optimization.backends.skopt.protocols` should be descendants of
:class:`~hyperparameter_hunter.optimization.protocol_core.BaseOptPro`
:mod:`hyperparameter_hunter.result_reader`
:mod:`hyperparameter_hunter.i_o.result_reader`
Used to locate result files for Experiments that are similar to the current optimization
constraints, and produce data to learn from in the case of :class:`SKOptPro`
:mod:`hyperparameter_hunter.space`
Expand Down Expand Up @@ -393,17 +393,17 @@ def forge_experiment(

#################### Deal with Keras ####################
if self.module_name == "keras":
reusable_build_fn, reusable_wrapper_params, dummy_layers, dummy_compile_params = keras_prep_workflow(
build_fn, wrapper_params, dummy_layers, dummy_compile_params = keras_prep_workflow(
self.model_initializer,
self.model_init_params["build_fn"],
self.model_extra_params,
self.source_script,
)
self.model_init_params = dict(build_fn=reusable_build_fn)
self.model_extra_params = reusable_wrapper_params
self.model_init_params = dict(build_fn=build_fn) # Reusable
self.model_extra_params = wrapper_params # Reusable
self.dummy_layers = dummy_layers
self.dummy_compile_params = dummy_compile_params
# FLAG: Deal with capitalization conflicts when comparing similar experiments: `optimizer`='Adam' vs 'adam'
# FLAG: Handle `optimizer` capitalization conflicts: `optimizer`="Adam" vs "adam"

self.set_dimensions()

Expand Down
10 changes: 8 additions & 2 deletions hyperparameter_hunter/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@ class G(object):
target, which is the same form as the original target data. Continuing the example of
label-encoded target data, and an :class:`feature_engineering.EngineerStep` to one-hot
encode the target, in this case, label-encoded predictions will be saved.
description_format: {"yaml", "json"}, default="yaml"
How to save Experiment Description files. See
:meth:`hyperparameter_hunter.i_o.recorders.RecorderList.__init__`
priority_callbacks: Tuple
Intended for internal use only. The contents of this tuple are inserted at the front of an
Experiment's list of callback bases via :class:`experiment_core.ExperimentMeta`, ahead of
Expand All @@ -116,11 +119,13 @@ class G(object):

#################### Miscellaneous Settings ####################
save_transformed_predictions = False
description_format = "yaml"

#################### Internal Settings ####################
priority_callbacks = tuple()

#################### Standard Logging Set by :class:`environment.Environment` ####################
#################### Standard Logging ####################
# Set by :class:`environment.Environment`
@staticmethod
def log(content, *args, **kwargs):
"""Set in :meth:`environment.Environment.initialize_reporting` to the updated version of
Expand All @@ -139,7 +144,8 @@ def warn(content, *args, **kwargs):
:meth:`reporting.ReportingHandler.warn`"""
warnings.warn(content, *args, **kwargs)

#################### Optimization Logging Set by :class:`protocol_core.BaseOptPro` ####################
#################### Optimization Logging ####################
# Set by :class:`protocol_core.BaseOptPro`
log_ = print
debug_ = print
warn_ = warnings.warn
Expand Down
Loading