diff --git a/forge/test/benchmark/__init__.py b/forge/test/benchmark/__init__.py
new file mode 100644
index 000000000..c682f53a4
--- /dev/null
+++ b/forge/test/benchmark/__init__.py
@@ -0,0 +1 @@
+from .benchmark import models
\ No newline at end of file
diff --git a/forge/test/benchmark/benchmark.py b/forge/test/benchmark/benchmark.py
new file mode 100644
index 000000000..a81d03e88
--- /dev/null
+++ b/forge/test/benchmark/benchmark.py
@@ -0,0 +1,116 @@
+
+import argparse
+
+from benchmark import models
+
+
+MODELS = {
+    'mnist_linear': models.mnist_linear.test_mnist_linear_benchmark,
+}
+
+
+def read_args():
+    """
+    Read the arguments from the command line.
+
+    Parameters:
+    ----------
+    None
+
+    Returns:
+    -------
+    parsed_args: dict
+        The parsed arguments from the command line.
+    """
+
+    # Create the argument parser
+    parser = argparse.ArgumentParser(description='Benchmark a model on TT hardware')
+    parser.add_argument('-m',   '--model',                                                     help='Model to benchmark (i.e. bert, mnist_linear).')
+    parser.add_argument('-c',   '--config',                                     default=None,  help='Model configuration to benchmark (i.e. tiny, base, large).')
+    parser.add_argument('-t',   '--training',    action='store_true',           default=False, help='Benchmark training.')
+    parser.add_argument('-bs',  '--batch_size',                       type=int, default=1,     help='Batch size, number of samples to process at once.')
+    parser.add_argument('-isz', '--input_size',                       type=int, default=None,  help='Input size, size of the input sample. If the model gives opportunity to change input size.')
+    parser.add_argument('-hs',  '--hidden_size',                      type=int, default=None,  help='Hidden size, size of the hidden layer. `If the model gives opportunity to change hidden size.')
+    parser.add_argument('-o',   '--output',                                                    help='Output json file to write results to, optionally. If file already exists, results will be appended.')
+
+    args = parser.parse_args()
+
+    # Initialize the parsed arguments
+    parsed_args = {}
+
+    if not args.model:
+        print("\nModel must be specified.\n\n")
+        print(parser.print_help())
+        exit(1)
+
+    if not args.model in MODELS:
+        print("Invalid model name. Available models: ")
+        print(list(MODELS.keys()))
+        exit(1)
+
+    parsed_args['model'] = args.model
+    parsed_args['config'] = args.config
+    parsed_args['training'] = args.training
+
+    if not args.batch_size:
+        print("\nBatch size is not specified. We set on size 1. \n\n")
+        parsed_args['batch_size'] = 1
+    else:
+        parsed_args['batch_size'] = args.batch_size
+        
+    
+    parsed_args['input_size'] = args.input_size
+    parsed_args['hidden_size'] = args.hidden_size
+
+    if not args.output:
+        print("\nOutput file is not specified.\n\n")
+        print(parser.print_help())
+        exit(1)
+
+    parsed_args['output'] = args.output
+
+    return parsed_args
+
+
+def run_benchmark(config: dict):
+    """
+    Run the benchmark test for the given model naconfiguration.
+
+    Parameters:
+    ----------
+    config: dict
+        The configuration of the model.
+
+    Returns:
+    -------
+    None
+    """
+    
+    model_func = MODELS[config['model']]
+    model_func(config)
+
+
+def main():
+    """ 
+    Main function for running the benchmark tests.
+
+    Parameters:
+    ----------
+    None
+
+    Returns:
+    -------
+    None
+    """
+    
+    print("Read the arguments from the command line.")
+    config = read_args()
+
+    print("Run the benchmark test for the given model configuration.")
+    run_benchmark(config)
+
+    print("Done.")
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/forge/test/benchmark/benchmark/__init__.py b/forge/test/benchmark/benchmark/__init__.py
new file mode 100644
index 000000000..6444108ec
--- /dev/null
+++ b/forge/test/benchmark/benchmark/__init__.py
@@ -0,0 +1 @@
+from .models import mnist_linear
\ No newline at end of file
diff --git a/forge/test/benchmark/benchmark/models/__init__.py b/forge/test/benchmark/benchmark/models/__init__.py
new file mode 100644
index 000000000..b3a2c416a
--- /dev/null
+++ b/forge/test/benchmark/benchmark/models/__init__.py
@@ -0,0 +1 @@
+from .mnist_linear import test_mnist_linear_benchmark
\ No newline at end of file
diff --git a/forge/test/benchmark/benchmark/models/mnist_linear.py b/forge/test/benchmark/benchmark/models/mnist_linear.py
index 4641e4bb1..d8639dbfe 100644
--- a/forge/test/benchmark/benchmark/models/mnist_linear.py
+++ b/forge/test/benchmark/benchmark/models/mnist_linear.py
@@ -13,12 +13,32 @@
 from forge.op.eval.common import compare_with_golden_pcc
 
 
-
+# Batch size configurations
 MNIST_BATCH_SIZE_EXP_RANGE = 9
-MNIST_INPUT_FEATURE_SIZE = 784
-MNIST_OUTPUT_FEATURE_SIZE = 10
 
-BATCH_SIZE = [2 ** i for i in range(MNIST_BATCH_SIZE_EXP_RANGE)]
+# Input size configurations
+MNIIST_INPUT_SIZE_EXP_RANGE = [5, 7]
+MNIIST_INPUT_SIZE_FACTORS = [1, 3, 5, 7]
+
+# Hidden layer size configurations
+MNIST_HIDDEN_SIZE_EXP_RANGE = [5, 9]
+MNIIST_HIDDEN_SIZE_FACTORS = [1, 3]
+
+MNIST_INPUT_FEATURE_SIZE = 784  # 784 = 28 * 28, default size of MNIST image 
+MNIST_OUTPUT_FEATURE_SIZE = 10  # 10 classes in MNIST, default output size
+MNIIST_HIDDEN_SIZE = 256        # Hidden layer size, default size
+
+BATCH_SIZE = [2 ** i for i in range(MNIST_BATCH_SIZE_EXP_RANGE)]    # Batch size, sizes will be 1, 2, 4, 8, 16, 32, 64, etc.
+INPUT_SIZE = [     # Input size, sizes will be 1 * 2^5 = 32, 3 * 2^5 = 96, 5 * 2^5 = 160, 7 * 2^5 = 224, etc.
+    factor * hidden 
+    for factor in MNIIST_INPUT_SIZE_FACTORS 
+    for hidden in [2 ** i for i in range(MNIIST_INPUT_SIZE_EXP_RANGE[0], MNIIST_INPUT_SIZE_EXP_RANGE[1])]
+]
+HIDDEN_SIZE = [     # Hidden layer size, sizes will be 1 * 2^5 = 32, 3 * 2^5 = 96, 1 * 2^6 = 64, 3 * 2^6 = 192, etc.
+    factor * hidden 
+    for factor in MNIIST_HIDDEN_SIZE_FACTORS 
+    for hidden in [2 ** i for i in range(MNIST_HIDDEN_SIZE_EXP_RANGE[0], MNIST_HIDDEN_SIZE_EXP_RANGE[1])]
+]
 ARCH = []
 DATAFORMAT = []
 MATH_FIDELITY = []
@@ -31,7 +51,7 @@ def __init__(
         self, 
         input_size=MNIST_INPUT_FEATURE_SIZE, 
         output_size=MNIST_OUTPUT_FEATURE_SIZE, 
-        hidden_size=256
+        hidden_size=MNIIST_HIDDEN_SIZE
     ):
 
         super(MNISTLinear, self).__init__()
@@ -50,15 +70,22 @@ def forward(self, x):
 
 # @pytest.mark.parametrize()
 # @pytest.mark.parametrize()
+
+# @TODO - For now, we are skipping these parameters, because we are not supporting them
 # @pytest.mark.parametrize("math_fidelity", MATH_FIDELITY, ids=[f"math_fidelity={item}" for item in MATH_FIDELITY])
 # @pytest.mark.parametrize("dataformat", DATAFORMAT, ids=[f"dataformat={item}" for item in DATAFORMAT])
 # @pytest.mark.parametrize("arch", ARCH, ids=[f"arch={item}" for item in ARCH])
+@pytest.mark.parametrize("hidden_size", HIDDEN_SIZE, ids=[f"hidden_size={item}" for item in HIDDEN_SIZE])
+@pytest.mark.parametrize("input_size", INPUT_SIZE, ids=[f"input_size={item}" for item in INPUT_SIZE])
 @pytest.mark.parametrize("batch_size", BATCH_SIZE, ids=[f"batch_size={item}" for item in BATCH_SIZE])
-def test_mnist_inference(
+def test_mnist_linear(
     training,
     batch_size,
+    input_size,
+    hidden_size,
     # arch,
     # dataformat,
+    # math_fidelity,
 ):
 
     if training:
@@ -67,13 +94,13 @@ def test_mnist_inference(
     if batch_size > 1:
         pytest.skip("Batch size greater than 1 not supported")
 
-    inputs = [torch.rand(batch_size, MNIST_INPUT_FEATURE_SIZE)]
+    inputs = [torch.rand(batch_size, input_size)]
 
-    framework_model = MNISTLinear()
+    framework_model = MNISTLinear(input_size=input_size, hidden_size=hidden_size)
     fw_out = framework_model(*inputs)
 
-    start = time.time()
     compiled_model = forge.compile(framework_model, sample_inputs=inputs)
+    start = time.time()
     co_out = compiled_model(*inputs)
     end = time.time()
 
@@ -88,26 +115,24 @@ def test_mnist_inference(
     samples_per_sec = batch_size / total_time
     model_name = "MNIST Linear"
 
-    print("=====================================================")
-    print("| MNIST Benchmark Results:                          |")
-    print("-----------------------------------------------------")
-    print(f"Model: {model_name}")
-    print(f"Date: {date}")
-    print(f"Machine Name: {machine_name}")
-    print(f"Total execution time: : {total_time}")
-    print(f"Total samples: {batch_size}")
-    print(f"Sample per second: {samples_per_sec}")
-    print(f"Batch size {batch_size}")
-    print(f"")
-    print(f"")
-    print(f"")
-    print("=====================================================")
-
-    output_file = "forge-benchmark-e2e-mnist.json"
+    print("====================================================================")
+    print("| MNIST Benchmark Results:                                         |")
+    print("--------------------------------------------------------------------")
+    print(f"| Model: {model_name}")
+    print(f"| Date: {date}")
+    print(f"| Machine name: {machine_name}")
+    print(f"| Total execution time: : {total_time}")
+    print(f"| Total samples: {batch_size}")
+    print(f"| Sample per second: {samples_per_sec}")
+    print(f"| Batch size: {batch_size}")
+    print(f"| Input size: {input_size}")
+    print(f"| Hidden size: {hidden_size}")
+    print("====================================================================")
 
     # Create a dictionary to store the results and the configuration
     result = {
         "model": model_name,
+        "config": "",
         "date": date,
         "hash": short_hash,
         "machine_name": machine_name,
@@ -116,7 +141,7 @@ def test_mnist_inference(
         "total_time": total_time,
         "training": training,
         "batch_size": batch_size,
-        "output": output_file,
+        "output": "",
         "arch": "",
         "chips": "",
         # "dataformat": dataformat,
@@ -130,6 +155,30 @@ def test_mnist_inference(
         "evaluation_score": "",
     }
 
+    return result
+
+
+def test_mnist_linear_benchmark(config: dict):
+
+    training = config['training']
+    batch_size = config['batch_size']
+    output_file = config['output']
+
+    input_size = MNIST_INPUT_FEATURE_SIZE if config['input_size'] is None else config['input_size']
+    hidden_size = MNIIST_HIDDEN_SIZE if config['hidden_size'] is None else config['hidden_size']
+
+    result = test_mnist_linear(
+        training=training,
+        batch_size=batch_size,
+        input_size=input_size,
+        hidden_size=hidden_size,
+    )
+
+    if not output_file:
+        output_file = f"forge-benchmark-e2e-mnist_{batch_size}_{input_size}_{hidden_size}.json"
+
+    result["output"] = output_file 
+
     # Save the results to a file
     with open(output_file, "w") as f:
         json.dump(result, f)
diff --git a/forge/test/benchmark/scripts/run_benchmark_wh b/forge/test/benchmark/scripts/run_benchmark_wh
new file mode 100644
index 000000000..94588af57
--- /dev/null
+++ b/forge/test/benchmark/scripts/run_benchmark_wh
@@ -0,0 +1,7 @@
+# ------------------------------------------------------------------------------------------------------------------------------------------------------------ #
+# Models we run on Wormhole B0
+# ------------------------------------------------------------------------------------------------------------------------------------------------------------ #
+
+
+# MNIST Linear
+python forge/test/benchmark/benchmark.py -m mnist_linear -bs 1 -o forge-benchmark-e2e-mnist.json
\ No newline at end of file