Add inverse log uniform distribution (#52)

wandb · Dec 7, 2021 · 8a49cda · 8a49cda
1 parent 105e0e6
commit 8a49cda
Show file tree

Hide file tree

Showing 3 changed files with 145 additions and 10 deletions.
diff --git a/src/sweeps/config/schema.json b/src/sweeps/config/schema.json
@@ -286,6 +286,31 @@
       },
       "additionalProperties": false
     },
+    "param_inv_loguniform": {
+      "type": "object",
+      "description": "inverse log uniform distribution",
+      "required": [
+        "distribution",
+        "max",
+        "min"
+      ],
+      "properties": {
+        "min": {
+          "description": "float",
+          "type": "number"
+        },
+        "max": {
+          "description": "float",
+          "type": "number"
+        },
+        "distribution": {
+          "enum": [
+            "inv_log_uniform"
+          ]
+        }
+      },
+      "additionalProperties": false
+    },
     "param_quniform": {
       "type": "object",
       "description": "quantized uniform distribution function.",
@@ -450,6 +475,9 @@
         {
           "$ref": "#/definitions/param_loguniform"
         },
+        {
+          "$ref": "#/definitions/param_inv_loguniform"
+        },
         {
           "$ref": "#/definitions/param_normal"
         },
@@ -538,16 +566,21 @@
       ]
     },
     "controller": {
-	"type": "object",
-	"properties": {
-	    "type": {
-		"type": "string",
-		"description": "Which controller to use, local or cloud",
-		"enum": ["local", "cloud"],
-		"default": "cloud"
-	    }
-	},
-	"required": ["type"]
+      "type": "object",
+      "properties": {
+        "type": {
+          "type": "string",
+          "description": "Which controller to use, local or cloud",
+          "enum": [
+            "local",
+            "cloud"
+          ],
+          "default": "cloud"
+        }
+      },
+      "required": [
+        "type"
+      ]
     },
     "description": {
       "type": "string",

diff --git a/src/sweeps/params.py b/src/sweeps/params.py
@@ -21,6 +21,7 @@ class HyperParameter:
     INT_UNIFORM = "param_int_uniform"
     UNIFORM = "param_uniform"
     LOG_UNIFORM = "param_loguniform"
+    INV_LOG_UNIFORM = "param_inv_loguniform"
     Q_UNIFORM = "param_quniform"
     Q_LOG_UNIFORM = "param_qloguniform"
     NORMAL = "param_normal"
@@ -118,6 +119,12 @@ def cdf(self, x: ArrayLike) -> ArrayLike:
             return stats.uniform.cdf(
                 np.log(x), self.config["min"], self.config["max"] - self.config["min"]
             )
+        elif self.type == HyperParameter.INV_LOG_UNIFORM:
+            return 1 - stats.uniform.cdf(
+                np.log(1 / x),
+                self.config["min"],
+                self.config["max"] - self.config["min"],
+            )
         elif self.type == HyperParameter.NORMAL or self.type == HyperParameter.Q_NORMAL:
             return stats.norm.cdf(x, loc=self.config["mu"], scale=self.config["sigma"])
         elif (
@@ -183,6 +190,14 @@ def ppf(self, x: ArrayLike) -> Any:
                     x, self.config["min"], self.config["max"] - self.config["min"]
                 )
             )
+        elif self.type == HyperParameter.INV_LOG_UNIFORM:
+            return np.exp(
+                -stats.uniform.ppf(
+                    1 - x,
+                    self.config["min"],
+                    self.config["max"] - self.config["min"],
+                )
+            )
         elif self.type == HyperParameter.Q_LOG_UNIFORM:
             r = np.exp(
                 stats.uniform.ppf(

diff --git a/tests/test_random_search.py b/tests/test_random_search.py
@@ -1,6 +1,7 @@
 import pytest
 
 from sweeps.config import SweepConfig
+from sweeps.params import HyperParameter
 import numpy as np
 from sweeps.run import next_run
 from sweeps._types import ArrayLike
@@ -204,6 +205,92 @@ def test_rand_loguniform(plot):
     assert pred_samples.max() <= v2_max
 
 
+def test_rand_inv_loguniform(plot):
+
+    # samples of v2 are between 1e-15 and 1e20
+    v2_min = 1e-15
+    v2_max = 1e20
+
+    # limits for sweep config are in log(1/x) space
+    limit_min = np.log(1 / v2_max)
+    limit_max = np.log(1 / v2_min)
+    n_samples = 20000
+
+    param_config = {
+        "min": limit_min,
+        "max": limit_max,
+        "distribution": "inv_log_uniform",
+    }
+
+    sweep_config_2params = SweepConfig(
+        {
+            "method": "random",
+            "parameters": {
+                "v2": param_config,
+            },
+        }
+    )
+
+    runs = []
+    for i in range(n_samples):
+        suggestion = next_run(sweep_config_2params, runs)
+        runs.append(suggestion)
+
+    pred_samples = np.asarray([run.config["v2"]["value"] for run in runs])
+    true_samples = np.random.uniform(limit_min, limit_max, size=n_samples)
+    true_samples = np.exp(true_samples)
+    true_samples = 1 / true_samples
+
+    # the lhs needs to be >= 0 because
+    bins = np.logspace(np.log10(v2_min), np.log10(v2_max), 10)
+
+    if plot:
+        plot_two_distributions(true_samples, pred_samples, bins, xscale="log")
+
+    check_that_samples_are_from_the_same_distribution(pred_samples, true_samples, bins)
+
+    assert pred_samples.min() >= v2_min
+    assert pred_samples.max() <= v2_max
+
+    # use more bins to check that the CDF is correct
+    bins = np.logspace(np.log10(v2_min), np.log10(v2_max), 100)
+    n, _ = np.histogram(true_samples, bins=bins)
+    cdf_empirical = np.cumsum(n) / np.sum(n)
+    bin_centers = 0.5 * (bins[1:] + bins[:-1])
+
+    hyperparameter = HyperParameter("inv_log_uniform", param_config)
+    cdf_pred = hyperparameter.cdf(bin_centers)
+
+    if plot:
+        import matplotlib.pyplot as plt
+        import inspect
+
+        fig, ax = plt.subplots()
+        ax.step(
+            bin_centers,
+            cdf_empirical,
+            label="true",
+        )
+        ax.step(
+            bin_centers,
+            cdf_pred,
+            label="pred",
+        )
+        ax.legend()
+        ax.set_xscale("log")
+        ax.tick_params(which="both", axis="both", direction="in")
+        current_test = os.environ.get("PYTEST_CURRENT_TEST")
+        if current_test is None:
+            current_test = inspect.stack()[1].function
+        else:
+            current_test = current_test.split(":")[-1].split(" ")[0]
+        fname = f"{current_test}.cdf.pdf"
+        fig.savefig(test_results_dir / fname)
+
+    # assert that the cdfs are within 0.03 everywhere
+    np.testing.assert_array_less(np.abs(cdf_pred - cdf_empirical), 0.03)
+
+
 @pytest.mark.parametrize("q", [0.1, 1, 10])
 def test_rand_q_lognormal(q, plot):