Tests use Adam optimizer.

james-choncholas · Jan 19, 2025 · 82cc2a7 · 82cc2a7
1 parent f98b3ee
commit 82cc2a7
Show file tree

Hide file tree

Showing 18 changed files with 41 additions and 35 deletions.
diff --git a/tf_shell_ml/conv2d.py b/tf_shell_ml/conv2d.py
@@ -173,9 +173,9 @@ def backward(self, dy, rotation_key=None, sensitivity_analysis_factor=None):
         # ciphertext scheme. Pad them to match the ciphertext scheme.
         if isinstance(dy, tf_shell.ShellTensor64):
             with tf.name_scope("conv_pad"):
-                batch_padding = [[0, dy._context.num_slots - self._layer_input_shape[0]]] + [
-                    [0, 0] for _ in range(len(self._layer_input_shape) - 1)
-                ]
+                batch_padding = [
+                    [0, dy._context.num_slots - self._layer_input_shape[0]]
+                ] + [[0, 0] for _ in range(len(self._layer_input_shape) - 1)]
                 x = tf.pad(x, batch_padding)
 
         if self.activation_deriv is not None:

diff --git a/tf_shell_ml/dense.py b/tf_shell_ml/dense.py
@@ -136,8 +136,8 @@ def backward(self, dy, rotation_key=None, sensitivity_analysis_factor=None):
         # ciphertext scheme when not in eager mode. Pad them to match the
         # ciphertext scheme.
         if isinstance(dy, tf_shell.ShellTensor64):
-            padding = [[0, dy._context.num_slots - self._layer_input_shape [0]]] + [
-                [0, 0] for _ in range(len(self._layer_input_shape ) - 1)
+            padding = [[0, dy._context.num_slots - self._layer_input_shape[0]]] + [
+                [0, 0] for _ in range(len(self._layer_input_shape) - 1)
             ]
             x = tf.pad(x, padding)
 

diff --git a/tf_shell_ml/dpsgd_sequential_model.py b/tf_shell_ml/dpsgd_sequential_model.py
@@ -69,7 +69,9 @@ def _backward(self, dJ_dz, sensitivity_analysis_factor=None):
 
     def compute_grads(self, features, enc_labels):
         scaling_factor = (
-            enc_labels.scaling_factor if hasattr(enc_labels, "scaling_factor") else float("inf")
+            enc_labels.scaling_factor
+            if hasattr(enc_labels, "scaling_factor")
+            else float("inf")
         )
         scaling_factor = tf.cast(scaling_factor, dtype=tf.keras.backend.floatx())
 

diff --git a/tf_shell_ml/model_base.py b/tf_shell_ml/model_base.py
@@ -820,7 +820,9 @@ def shell_train_step(self, features, labels, read_key_from_cache, apply_gradient
                 # than the plaintext modulus.
                 tf.assert_less(
                     max_two_norm,
-                    tf.cast(backprop_context.plaintext_modulus, tf.keras.backend.floatx()),
+                    tf.cast(
+                        backprop_context.plaintext_modulus, tf.keras.backend.floatx()
+                    ),
                     message="Gradient may be too large for the backprop context's plaintext modulus. Reduce the sensitivity by reducing the gradient norms (e.g. reducing the backprop scaling factor), or increase the backprop context's plaintext modulus.",
                 )
 

diff --git a/tf_shell_ml/postscale_sequential_model.py b/tf_shell_ml/postscale_sequential_model.py
@@ -110,7 +110,9 @@ def _backward(self, dJ_dz, jacobians):
 
     def compute_grads(self, features, enc_labels):
         scaling_factor = (
-            enc_labels.scaling_factor if hasattr(enc_labels, "scaling_factor") else float("inf")
+            enc_labels.scaling_factor
+            if hasattr(enc_labels, "scaling_factor")
+            else float("inf")
         )
         scaling_factor = tf.cast(scaling_factor, dtype=tf.keras.backend.floatx())
 

diff --git a/tf_shell_ml/test/conv2d_test.py b/tf_shell_ml/test/conv2d_test.py
@@ -76,7 +76,7 @@ def _test_conv2d_plaintext_forward_backward_correct(
         self.assertAllClose(y, tf_y)
 
         # Next check backward pass.
-        dws, dx = conv_layer.backward(tf.ones_like(y), rotation_key)
+        dws, dx, _ = conv_layer.backward(tf.ones_like(y), rotation_key)
         with tf.GradientTape(persistent=True) as tape:
             tape.watch(im)
             y = tf_conv_layer(im)
@@ -130,13 +130,13 @@ def forward_backward(x):
             enc_dy = tf_shell.to_encrypted(dy, key, context)
 
             # Encrypted backward pass.
-            enc_dw, enc_dx = conv_layer.backward(enc_dy, rotation_key)
+            enc_dw, enc_dx, _ = conv_layer.backward(enc_dy, rotation_key)
             dw = tf_shell.to_tensorflow(enc_dw[0], key)
             # dw = conv_layer.unpack(dw)  # for layer reduction 'fast' or 'galois'
             dx = tf_shell.to_tensorflow(enc_dx, key)
 
             # Plaintext backward pass.
-            pt_dws, pt_dx = conv_layer.backward(dy, None)
+            pt_dws, pt_dx, _ = conv_layer.backward(dy, None)
             pt_dw = pt_dws[0]  # No unpack required for pt.
 
             return dw, dx, dw.shape, dx.shape, pt_dw, pt_dx

diff --git a/tf_shell_ml/test/dpsgd_conv_model_local_test.py b/tf_shell_ml/test/dpsgd_conv_model_local_test.py
@@ -41,7 +41,7 @@ def _test_model(self, disable_encryption, disable_masking, disable_noise, cache)
         labels_dataset = labels_dataset.batch(2**10)
 
         features_dataset = tf.data.Dataset.from_tensor_slices(x_train)
-        features_dataset = features_dataset.batch(2**10)
+        features_dataset = features_dataset.batch(2**12)
 
         val_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
         val_dataset = val_dataset.batch(32)
@@ -103,12 +103,12 @@ def _test_model(self, disable_encryption, disable_masking, disable_noise, cache)
             disable_masking=disable_masking,
             disable_noise=disable_noise,
             cache_path=cache,
-            # check_overflow_INSECURE=True,
+            check_overflow_INSECURE=True,
         )
 
         m.compile(
             loss=tf.keras.losses.CategoricalCrossentropy(),
-            optimizer=tf.keras.optimizers.Adam(0.01),
+            optimizer=tf.keras.optimizers.Adam(0.01, beta_1=0.8),
             metrics=[tf.keras.metrics.CategoricalAccuracy()],
         )
 
@@ -119,7 +119,7 @@ def _test_model(self, disable_encryption, disable_masking, disable_noise, cache)
             features_dataset,
             labels_dataset,
             steps_per_epoch=1,
-            epochs=1,
+            epochs=8,
             verbose=1,
             validation_data=val_dataset,
         )

diff --git a/tf_shell_ml/test/dpsgd_model_distrib_test.py b/tf_shell_ml/test/dpsgd_model_distrib_test.py
@@ -100,7 +100,7 @@ def test_model(self):
                 ],
                 lambda read_from_cache: tf_shell.create_autocontext64(
                     log2_cleartext_sz=23,
-                    scaling_factor=16,
+                    scaling_factor=4,
                     noise_offset_log2=14,
                     read_from_cache=read_from_cache,
                     cache_path=cache,
@@ -119,7 +119,7 @@ def test_model(self):
 
             m.compile(
                 loss=tf.keras.losses.CategoricalCrossentropy(),
-                optimizer=tf.keras.optimizers.Adam(0.05),
+                optimizer=tf.keras.optimizers.Adam(0.1),
                 metrics=[tf.keras.metrics.CategoricalAccuracy()],
             )
 

diff --git a/tf_shell_ml/test/dpsgd_model_float64_test.py b/tf_shell_ml/test/dpsgd_model_float64_test.py
@@ -58,7 +58,7 @@ def _test_model(self, disable_encryption, disable_masking, disable_noise, cache)
             ],
             backprop_context_fn=lambda read_from_cache: tf_shell.create_autocontext64(
                 log2_cleartext_sz=23,
-                scaling_factor=16,
+                scaling_factor=4,
                 noise_offset_log2=14,
                 read_from_cache=read_from_cache,
                 cache_path=cache,
@@ -79,7 +79,7 @@ def _test_model(self, disable_encryption, disable_masking, disable_noise, cache)
 
         m.compile(
             loss=tf.keras.losses.CategoricalCrossentropy(),
-            optimizer=tf.keras.optimizers.Adam(0.05),
+            optimizer=tf.keras.optimizers.Adam(0.1),
             metrics=[tf.keras.metrics.CategoricalAccuracy()],
         )
 

diff --git a/tf_shell_ml/test/dpsgd_model_local_test.py b/tf_shell_ml/test/dpsgd_model_local_test.py
@@ -58,7 +58,7 @@ def _test_model(self, disable_encryption, disable_masking, disable_noise, cache)
             ],
             backprop_context_fn=lambda read_from_cache: tf_shell.create_autocontext64(
                 log2_cleartext_sz=23,
-                scaling_factor=16,
+                scaling_factor=4,
                 noise_offset_log2=14,
                 read_from_cache=read_from_cache,
                 cache_path=cache,
@@ -79,7 +79,7 @@ def _test_model(self, disable_encryption, disable_masking, disable_noise, cache)
 
         m.compile(
             loss=tf.keras.losses.CategoricalCrossentropy(),
-            optimizer=tf.keras.optimizers.Adam(0.05),
+            optimizer=tf.keras.optimizers.Adam(0.1),
             metrics=[tf.keras.metrics.CategoricalAccuracy()],
         )
 

diff --git a/tf_shell_ml/test/dropout_test.py b/tf_shell_ml/test/dropout_test.py
@@ -70,10 +70,10 @@ def _test_dropout_back(self, per_batch):
         notrain_y = dropout_layer.call(x, training=True)
         dy = tf.ones_like(notrain_y)
 
-        dw, dx = dropout_layer.backward(dy, None)
+        dw, dx, _ = dropout_layer.backward(dy, None)
 
         enc_dy = tf_shell.to_encrypted(dy, key, context)
-        enc_dw, enc_dx = dropout_layer.backward(enc_dy, None)
+        enc_dw, enc_dx, _ = dropout_layer.backward(enc_dy, None)
         dec_dx = tf_shell.to_tensorflow(enc_dx, key)
 
         self.assertEmpty(dw)

diff --git a/tf_shell_ml/test/embedding_test.py b/tf_shell_ml/test/embedding_test.py
@@ -83,7 +83,7 @@ def forward_backward(x):
             dy = tf.ones_like(y)
             enc_dy = tf_shell.to_encrypted(dy, key, context)
 
-            enc_dw, _ = embedding_layer.backward(enc_dy, rotation_key)
+            enc_dw, _, _ = embedding_layer.backward(enc_dy, rotation_key)
             dw = tf_shell.to_tensorflow(enc_dw[0], key)
             if reduction == "none":
                 dw = tf.reduce_sum(dw, axis=0)

diff --git a/tf_shell_ml/test/max_pool2d_test.py b/tf_shell_ml/test/max_pool2d_test.py
@@ -66,7 +66,7 @@ def _test_max_pool2d_plaintext_forward_backward_correct(
         self.assertAllClose(y, tf_y)
 
         # Next check backward pass.
-        _, dx = layer.backward(tf.ones_like(y), rotation_key)
+        _, dx, _ = layer.backward(tf.ones_like(y), rotation_key)
         with tf.GradientTape(persistent=True) as tape:
             tape.watch(im)
             y = tf_layer(im)
@@ -107,11 +107,11 @@ def forward_backward(x):
             enc_dy = tf_shell.to_encrypted(dy, key, context)
 
             # Encrypted backward pass.
-            _, enc_dx = layer.backward(enc_dy, rotation_key)
+            _, enc_dx, _ = layer.backward(enc_dy, rotation_key)
             dx = tf_shell.to_tensorflow(enc_dx, key)
 
             # Plaintext backward pass.
-            _, pt_dx = layer.backward(dy, None)
+            _, pt_dx, _ = layer.backward(dy, None)
 
             return dx, dx.shape, pt_dx
 

diff --git a/tf_shell_ml/test/mnist_enc_backprop_test.py b/tf_shell_ml/test/mnist_enc_backprop_test.py
@@ -70,13 +70,13 @@ def train_step(x, y, hidden_layer, output_layer):
     # Backward pass.
     dJ_dy_pred = y.__rsub__(y_pred)  # Derivative of CCE loss and softmax.
 
-    dJ_dw1, dJ_dx1 = output_layer.backward(dJ_dy_pred, rotation_key)
+    dJ_dw1, dJ_dx1, _ = output_layer.backward(dJ_dy_pred, rotation_key)
 
     # Mod reduce will reduce noise but increase the plaintext error.
     # if isinstance(dJ_dx1, tf_shell.ShellTensor64):
     #     dJ_dx1.get_mod_reduced()
 
-    dJ_dw0, dJ_dx0_unused = hidden_layer.backward(dJ_dx1, rotation_key)
+    dJ_dw0, dJ_dx0_unused, _ = hidden_layer.backward(dJ_dx1, rotation_key)
 
     # Only return the weight gradients at [0], not the bias gradients at [1].
     return dJ_dw1[0], dJ_dw0[0]

diff --git a/tf_shell_ml/test/mnist_noenc_backprop_test.py b/tf_shell_ml/test/mnist_noenc_backprop_test.py
@@ -20,7 +20,7 @@
 import tf_shell
 import tf_shell_ml
 
-epochs = 6
+epochs = 2
 batch_size = 2**12
 
 # Prepare the dataset.
@@ -72,9 +72,9 @@ def train_step(x, y):
     # Backward pass.
     dJ_dy_pred = y.__rsub__(y_pred)  # Derivative of CCE loss and softmax.
 
-    dJ_dw1, dJ_dx1 = output_layer.backward(dJ_dy_pred, None)
+    dJ_dw1, dJ_dx1, _ = output_layer.backward(dJ_dy_pred, None)
 
-    dJ_dw0, dJ_dx0_unused = hidden_layer.backward(dJ_dx1, None)
+    dJ_dw0, dJ_dx0_unused, _ = hidden_layer.backward(dJ_dx1, None)
 
     return dJ_dw1, dJ_dw0
 

diff --git a/tf_shell_ml/test/postscale_model_distrib_test.py b/tf_shell_ml/test/postscale_model_distrib_test.py
@@ -115,7 +115,7 @@ def test_model(self):
 
             m.compile(
                 loss=tf.keras.losses.CategoricalCrossentropy(),
-                optimizer=tf.keras.optimizers.Adam(0.05),
+                optimizer=tf.keras.optimizers.Adam(0.1),
                 metrics=[tf.keras.metrics.CategoricalAccuracy()],
             )
 

diff --git a/tf_shell_ml/test/postscale_model_float64_test.py b/tf_shell_ml/test/postscale_model_float64_test.py
@@ -71,7 +71,7 @@ def _test_model(self, disable_encryption, disable_masking, disable_noise, cache)
 
         m.compile(
             loss=tf.keras.losses.CategoricalCrossentropy(),
-            optimizer=tf.keras.optimizers.Adam(0.05),
+            optimizer=tf.keras.optimizers.Adam(0.1),
             metrics=[tf.keras.metrics.CategoricalAccuracy()],
         )
 

diff --git a/tf_shell_ml/test/postscale_model_local_test.py b/tf_shell_ml/test/postscale_model_local_test.py
@@ -71,7 +71,7 @@ def _test_model(self, disable_encryption, disable_masking, disable_noise, cache)
 
         m.compile(
             loss=tf.keras.losses.CategoricalCrossentropy(),
-            optimizer=tf.keras.optimizers.Adam(0.05),
+            optimizer=tf.keras.optimizers.Adam(0.1),
             metrics=[tf.keras.metrics.CategoricalAccuracy()],
         )