Add helper for creating subsampled experiment configs

achalddave · Nov 29, 2017 · da707ad · da707ad
1 parent 093bdab
commit da707ad
Show file tree

Hide file tree

Showing 3 changed files with 127 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,8 @@
 *.log
 *.pyc
 config/*.yaml
+config/subsample/*.yaml
+!config/subsample/config-subsample-template.yaml
 !config/config-predictive-corrective.yaml
 !config/config-c3d.yaml
 !config/config-resnet.yaml

diff --git a/config/subsample/config-subsample-template.yaml b/config/subsample/config-subsample-template.yaml
@@ -0,0 +1,85 @@
+###
+# General options
+###
+seed: 0
+gpus: [1]
+
+###
+# Data options
+###
+data_paths_config: 'config/local_data_paths.yaml'
+
+# Points to keys in data_paths_config
+train_split: 'train_val_split'
+val_split: 'test_split'
+
+data_source_class: 'SubsampledLmdbSource'
+data_source_options: {
+    subsample_rate: {{SUBSAMPLE_RATE}}
+}
+
+# Number of labels
+num_labels: 65
+
+# Size to crop image to before passing to network.
+crop_size: 224
+
+# Mean pixel.
+# Mean pixel for train and val set:
+pixel_mean: [94.57184865, 100.78170151, 101.76892795]
+
+###
+# Training options
+###
+# Number of total epochs to run.
+num_epochs: {{NUM_EPOCHS}}
+# Number of batches in epoch.
+# # For train
+# # Images in train:     459806
+# epoch_size: 3593  # ~= 459806 / 64 / 2 (Approximately half of train data)
+
+# For train+val
+# Images in train+val: 556241
+epoch_size: {{EPOCH_SIZE}}  # ~= 556241 / 64 / 2 (Approximately half of train+val data)
+
+# Specify epoch to start at (e.g. if we are continuing to train a model from
+# earlier).
+init_epoch: 1
+# Number of examples in batch.
+batch_size: 64
+sampler_class: 'SequentialBatchSampler'
+sampler_options: {
+}
+
+# If sampling_strategy is 'sequential', this is the length of the backprop
+# sequence.
+sequence_length: 1
+# One of 'sequencer_criterion', 'last_step_criterion', or the empty string. Only
+# used if the model for training is nn.Sequencer.
+criterion_wrapper: 'sequencer_criterion'
+
+checkpoint_every: 2
+evaluate_every: {{EVALUATE_EVERY}}
+
+###
+# Optimization options
+###
+momentum: 0.9
+weight_decay: 5.e-4
+learning_rates: [
+    { start_epoch:  {{LR_EPOCH1}}, learning_rate: 1.e-3 },
+    { start_epoch:  {{LR_EPOCH2}}, learning_rate: 1.e-4 },
+    { start_epoch:  {{LR_EPOCH3}}, learning_rate: 1.e-5 },
+    { start_epoch:  {{LR_EPOCH4}}, learning_rate: 1.e-6 },
+    { start_epoch:  {{LR_EPOCH5}}, learning_rate: 1.e-7 }
+]
+learning_rate_multipliers: [
+    { name: 'nn.Linear', index: 3, weight: 10, bias: 10 }
+]
+
+###
+# Model options
+###
+# Torch model to start training with.
+model_init: '/data/achald/MultiTHUMOS/models/pretrained/vgg_pretrained_last_layer_updated/vgg16_ilsvrc_2014_pretrained_updated_for_multithumos.t7'
+decorate_sequencer: True
diff --git a/config/subsample/create_subsample_config.py b/config/subsample/create_subsample_config.py
@@ -0,0 +1,40 @@
+import argparse
+import re
+
+if __name__ == "__main__":
+    # Use first line of file docstring as description if a file docstring exists.
+    parser = argparse.ArgumentParser(
+        description=__doc__.split('\n')[0] if __doc__ else '',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('--subsample_rate', required=True, type=int)
+    parser.add_argument('--config_output', required=True)
+    parser.add_argument('--config_template', default='config-subsample-template.yaml')
+    # Default is number of images in train+val set.
+    parser.add_argument('--num_images', default=556241, type=int)
+
+    args = parser.parse_args()
+
+    with open(args.config_template, 'r') as f:
+        template = f.read()
+
+    # We can't use yaml to parse the template, since the template itself is not
+    # valid yaml. Instead, hackily parse the config file to get the batch size.
+    batch_size = int(re.search(r'batch_size: *([0-9]*) *\n', template).group(1))
+    subsample = args.subsample_rate
+
+    def update_template(var, value):
+        return template.replace(var, str(value))
+
+    template = update_template('{{SUBSAMPLE_RATE}}', subsample)
+    template = update_template('{{NUM_EPOCHS}}', 2 * subsample)
+    template = update_template(
+        '{{EPOCH_SIZE}}',
+        int(round(args.num_images / batch_size / subsample / 2)))
+    template = update_template('{{EVALUATE_EVERY}}', subsample)
+
+    # Reduce learning rate by 10 every 2.5 epochs.
+    lr_step = subsample * 5
+    for i in range(5):
+        template = update_template('{{LR_EPOCH%d}}' % (i + 1), lr_step * i + 1)
+    with open(args.config_output, 'w') as f:
+        f.write(template)