diff --git a/models/experimental/functional_yolov11/README.md b/models/experimental/functional_yolov11/README.md
new file mode 100644
index 00000000000..1f95c0c012d
--- /dev/null
+++ b/models/experimental/functional_yolov11/README.md
@@ -0,0 +1,36 @@
+## YOLOv11n - Model
+
+#### Introduction
+
+**YOLOv11** is the latest iteration in the YOLO series, bringing cutting-edge improvements in accuracy, speed, and efficiency for real-time object detection. Building on the success of previous versions, YOLOv11 introduces enhanced architecture and optimized training methods, making it a versatile solution for a wide range of computer vision tasks, from object detection to image classification and pose estimation.
+
+#### Model Details
+
+* The entry point to the YOLOv11 model is located in:
+`models/experimental/functional_yolov11/tt/ttnn_yolov11.py`
+
+* The model picks up weights from the **yolov11n.pt** file located in:
+`models/experimental/functional_yolov11/reference/yolov11n.pt`
+
+#### Batch Size:
+* Set to 1 by default.
+* Batch size determines the number of input sequences processed simultaneously during training or inference, impacting computational efficiency and memory usage.
+* It's recommended to keep the batch size to **1** for optimal performance.
+
+#### Running YOLOv11 Demo
+* To run the YOLOv11 demo for different resolutions (**224x224** and **640x640**), use the following command:
+`pytest --disable-warnings models/experimental/functional_yolov11/demo/demo.py`
+
+#### Input Data
+* By default, the demo will receive inputs from the `models/experimental/functional_yolov11/demo/images` directory. To test the model on different input data, simply add new image files to this directory.
+
+#### Output Data
+
+* The output from the model will be saved in a **runs** folder created inside:
+`models/experimental/functional_yolov11/demo/`
+* For reference:
+The model output(torch model) will be stored in the **torch_model** directory.
+The TTNN model output will be stored in the **tt_model** directory.
+
+#### Pending Issues:
+* [#17385](https://github.com/tenstorrent/tt-metal/issues/17835) - Tracing fails in Yolov11n model
diff --git a/models/experimental/functional_yolov11/demo/demo.py b/models/experimental/functional_yolov11/demo/demo.py
index bc9dd3eaf4b..2da75fa9978 100644
--- a/models/experimental/functional_yolov11/demo/demo.py
+++ b/models/experimental/functional_yolov11/demo/demo.py
@@ -2,6 +2,7 @@
 
 # SPDX-License-Identifier: Apache-2.0
 
+from pathlib import Path
 import os
 import cv2
 import sys
@@ -11,17 +12,16 @@
 import torch.nn as nn
 from loguru import logger
 from datetime import datetime
-from functools import partial
 from models.utility_functions import disable_persistent_kernel_cache
 from models.experimental.functional_yolov11.reference import yolov11
-
+from models.experimental.functional_yolov11.reference.yolov11 import attempt_load
 from models.experimental.functional_yolov11.tt import ttnn_yolov11
-
 from models.experimental.functional_yolov11.tt.model_preprocessing import (
     create_yolov11_input_tensors,
     create_yolov11_model_parameters,
 )
 from models.experimental.functional_yolov11.demo.demo_utils import LoadImages, preprocess, postprocess
+from models.utility_functions import skip_for_grayskull
 
 try:
     sys.modules["ultralytics"] = yolov11
@@ -33,38 +33,6 @@
     print("models.experimental.functional_yolov11.reference.yolov11 not found.")
 
 
-class Ensemble(nn.ModuleList):
-    def __init__(self):
-        super(Ensemble, self).__init__()
-
-    def forward(self, x, augment=False):
-        y = []
-        for module in self:
-            y.append(module(x, augment)[0])
-        y = torch.cat(y, 1)
-        return y, None
-
-
-def attempt_load(weights, map_location=None):
-    model = Ensemble()
-    for w in weights if isinstance(weights, list) else [weights]:
-        w = "models/experimental/functional_yolov11/reference/yolo11n.pt"
-        ckpt = torch.load(w, map_location=map_location)
-        model.append(ckpt["ema" if ckpt.get("ema") else "model"].float().eval())
-    for m in model.modules():
-        if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
-            m.inplace = True
-        elif type(m) is nn.Upsample:
-            m.recompute_scale_factor = None
-
-    if len(model) == 1:
-        return model[-1]
-    else:
-        for k in ["names", "stride"]:
-            setattr(model, k, getattr(model[-1], k))
-        return model
-
-
 def save_yolo_predictions_by_model(result, save_dir, image_path, model_name):
     model_save_dir = os.path.join(save_dir, model_name)
     os.makedirs(model_save_dir, exist_ok=True)
@@ -99,53 +67,61 @@ def save_yolo_predictions_by_model(result, save_dir, image_path, model_name):
     print(f"Predictions saved to {output_path}")
 
 
+@skip_for_grayskull()
+@pytest.mark.parametrize(
+    "use_pretrained_weight",
+    [
+        False,
+        # True      # uncomment  to run the model for real weights
+    ],
+    ids=[
+        "pretrained_weight_false",
+        # "pretrained_weight_true",    # uncomment to run the model for real weights
+    ],
+)
 @pytest.mark.parametrize("device_params", [{"l1_small_size": 32768}], indirect=True)
 @pytest.mark.parametrize(
-    "source, model_type",
+    "source, model_type,resolution",
     [
-        ("models/experimental/functional_yolov11/demo/images/cycle_girl.jpg", "torch_model"),
-        ("models/experimental/functional_yolov11/demo/images/cycle_girl.jpg", "tt_model"),
-        ("models/experimental/functional_yolov11/demo/images/dog.jpg", "torch_model"),
-        ("models/experimental/functional_yolov11/demo/images/dog.jpg", "tt_model"),
+        # 224*224
+        # ("models/experimental/functional_yolov11/demo/images/cycle_girl.jpg", "torch_model", [3, 224, 224]),
+        # ("models/experimental/functional_yolov11/demo/images/cycle_girl.jpg", "tt_model", [3, 224, 224]),
+        # ("models/experimental/functional_yolov11/demo/images/dog.jpg", "torch_model", [3, 224, 224]),
+        # ("models/experimental/functional_yolov11/demo/images/dog.jpg", "tt_model", [3, 224, 224]),
+        # 640*640
+        # ("models/experimental/functional_yolov11/demo/images/cycle_girl.jpg", "torch_model", [3, 640, 640]),
+        ("models/experimental/functional_yolov11/demo/images/cycle_girl.jpg", "tt_model", [3, 640, 640]),
+        # ("models/experimental/functional_yolov11/demo/images/dog.jpg", "torch_model", [3, 640, 640]),
+        # ("models/experimental/functional_yolov11/demo/images/dog.jpg", "tt_model", [3, 640, 640]),
     ],
 )
-def test_demo(device, source, model_type):
+def test_demo(device, source, model_type, resolution, use_pretrained_weight):
     disable_persistent_kernel_cache()
-
-    if model_type == "torch_model":
-        model = attempt_load("models/experimental/functional_yolov11/reference/yolo11n.pt", map_location="cpu")
-        state_dict = model.state_dict()
-        model = yolov11.YoloV11()
+    model = yolov11.YoloV11()
+    if use_pretrained_weight:
+        logger.info(f"Demo Inferencing with Pre-trained Weights")
+        state_dict = attempt_load("yolo11n.pt", map_location="cpu").state_dict()
         ds_state_dict = {k: v for k, v in state_dict.items()}
         new_state_dict = {}
         for (name1, parameter1), (name2, parameter2) in zip(model.state_dict().items(), ds_state_dict.items()):
             if isinstance(parameter2, torch.FloatTensor):
                 new_state_dict[name1] = parameter2
         model.load_state_dict(new_state_dict)
+    else:
+        logger.info(f"Demo Inferencing with Random Weights")
+    if model_type == "torch_model":
         model.eval()
         logger.info("Inferencing using Torch Model")
     else:
         torch_input, ttnn_input = create_yolov11_input_tensors(
-            device, input_channels=3, input_height=224, input_width=224
+            device, input_channels=resolution[0], input_height=resolution[1], input_width=resolution[2]
         )
-        torch_model = attempt_load("models/experimental/functional_yolov11/reference/yolo11n.pt", map_location="cpu")
-        state_dict = torch_model.state_dict()
-        torch_model = yolov11.YoloV11()
-        ds_state_dict = {k: v for k, v in state_dict.items()}
-        new_state_dict = {}
-        for (name1, parameter1), (name2, parameter2) in zip(torch_model.state_dict().items(), ds_state_dict.items()):
-            if isinstance(parameter2, torch.FloatTensor):
-                new_state_dict[name1] = parameter2
-        torch_model.load_state_dict(new_state_dict)
-        torch_model.eval()
-        parameters = create_yolov11_model_parameters(torch_model, torch_input, device=device)
+        parameters = create_yolov11_model_parameters(model, torch_input, device=device)
         model = ttnn_yolov11.YoloV11(device, parameters)
         logger.info("Inferencing using ttnn Model")
 
     save_dir = "models/experimental/functional_yolov11/demo/runs"
-
     dataset = LoadImages(path=source)
-
     model_save_dir = os.path.join(save_dir, model_type)
     os.makedirs(model_save_dir, exist_ok=True)
 
@@ -234,10 +210,9 @@ def test_demo(device, source, model_type):
 
     for batch in dataset:
         paths, im0s, s = batch
-        im = preprocess(im0s)
+        im = preprocess(im0s, resolution)
         if model_type == "torch_model":
             preds = model(im)
-            print("preds in torch", preds.shape)
         else:
             img = torch.permute(im, (0, 2, 3, 1))
             img = img.reshape(
@@ -247,16 +222,8 @@ def test_demo(device, source, model_type):
                 img.shape[3],
             )
             ttnn_im = ttnn.from_torch(img, layout=ttnn.TILE_LAYOUT, dtype=ttnn.bfloat8_b)
-            # ttnn_im = ttnn.from_torch(img, layout=ttnn.ROW_MAJOR_LAYOUT, dtype=ttnn.bfloat16)
-            # print("input tensor in demo",ttnn_im.shape,ttnn_im.dtype,ttnn_im.memory_config(),ttnn_im.layout)
             preds = model(x=ttnn_im)
             preds = ttnn.to_torch(preds, dtype=torch.float32)
-            print("preds in ttnn", preds.shape)
-
         results = postprocess(preds, im, im0s, batch, names)[0]
-
         save_yolo_predictions_by_model(results, save_dir, source, model_type)
-    # input tensor in demo Shape([1, 1, 409600, 3]) DataType.BFLOAT8_B MemoryConfig(memory_layout=TensorMemoryLayout::INTERLEAVED,buffer_type=BufferType::DRAM,shard_spec=std::nullopt) Layout.TILE
-
-    # input tensor in demo Shape([1, 1, 409600, 3]) DataType.BFLOAT16 MemoryConfig(memory_layout=TensorMemoryLayout::INTERLEAVED,buffer_type=BufferType::DRAM,shard_spec=std::nullopt) Layout.ROW_MAJOR
     print("Inference done")
diff --git a/models/experimental/functional_yolov11/demo/demo_utils.py b/models/experimental/functional_yolov11/demo/demo_utils.py
index 5701b1ada36..d603baae08f 100644
--- a/models/experimental/functional_yolov11/demo/demo_utils.py
+++ b/models/experimental/functional_yolov11/demo/demo_utils.py
@@ -114,15 +114,19 @@ def LetterBox(img, new_shape=(224, 224), auto=False, scaleFill=False, scaleup=Tr
     return img
 
 
-def pre_transform(im):
-    return [LetterBox(img=x) for x in im]
+def pre_transform(im, LetterBox_shape=(224, 224)):
+    return [LetterBox(img=x, new_shape=LetterBox_shape) for x in im]
 
 
-def preprocess(im):
+def preprocess(im, resolution):
     device = "cpu"
     not_tensor = not isinstance(im, torch.Tensor)
     if not_tensor:
-        im = np.stack(pre_transform(im))
+        if resolution[1] == 224:
+            LetterBox_shape = (224, 224)
+        else:
+            LetterBox_shape = (640, 640)
+        im = np.stack(pre_transform(im, LetterBox_shape))
         im = im[..., ::-1].transpose((0, 3, 1, 2))
         im = np.ascontiguousarray(im)
         im = torch.from_numpy(im)
diff --git a/models/experimental/functional_yolov11/demo/runs/torch_model/prediction_torch_224_1.jpg b/models/experimental/functional_yolov11/demo/runs/torch_model/prediction_torch_224_1.jpg
new file mode 100644
index 00000000000..c0fb3dec911
Binary files /dev/null and b/models/experimental/functional_yolov11/demo/runs/torch_model/prediction_torch_224_1.jpg differ
diff --git a/models/experimental/functional_yolov11/demo/runs/torch_model/prediction_torch_640_1.jpg b/models/experimental/functional_yolov11/demo/runs/torch_model/prediction_torch_640_1.jpg
new file mode 100644
index 00000000000..03d33858ddf
Binary files /dev/null and b/models/experimental/functional_yolov11/demo/runs/torch_model/prediction_torch_640_1.jpg differ
diff --git a/models/experimental/functional_yolov11/demo/runs/tt_model/prediction_ttnn_224_1.jpg b/models/experimental/functional_yolov11/demo/runs/tt_model/prediction_ttnn_224_1.jpg
new file mode 100644
index 00000000000..aeacb0426be
Binary files /dev/null and b/models/experimental/functional_yolov11/demo/runs/tt_model/prediction_ttnn_224_1.jpg differ
diff --git a/models/experimental/functional_yolov11/demo/runs/tt_model/prediction_ttnn_640_1.jpg b/models/experimental/functional_yolov11/demo/runs/tt_model/prediction_ttnn_640_1.jpg
new file mode 100644
index 00000000000..e4a4a4481b3
Binary files /dev/null and b/models/experimental/functional_yolov11/demo/runs/tt_model/prediction_ttnn_640_1.jpg differ
diff --git a/models/experimental/functional_yolov11/readme.md b/models/experimental/functional_yolov11/readme.md
deleted file mode 100644
index c11208a12eb..00000000000
--- a/models/experimental/functional_yolov11/readme.md
+++ /dev/null
@@ -1,11 +0,0 @@
-# Current status
-
-Facing ```ModuleNotFoundError: No module named 'ultralytics'``` issue while loading the weights file, so implemented with random weights
-- Finished reference implementation.
-- Completed ttnn implementation of Conv, Bottleneck, SPPF, C3K, C3K2
-
-## PCC:
-Conv - 0.99 </br>
-C3K2 with bottleneck - 0.99</br>
-C3K2 with C3k - facing shape mismatch error</br>
-SPPF - Not checked yet</br>
diff --git a/models/experimental/functional_yolov11/reference/yolov11.py b/models/experimental/functional_yolov11/reference/yolov11.py
index 2b34b89a651..3d26a2a2dec 100644
--- a/models/experimental/functional_yolov11/reference/yolov11.py
+++ b/models/experimental/functional_yolov11/reference/yolov11.py
@@ -1,14 +1,15 @@
-# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
 
 # SPDX-License-Identifier: Apache-2.0
 
+from pathlib import Path
+import os
 import torch
 import torch.nn as nn
 import torch.nn.functional as f
 import math
-
-# from torchview import draw_graph
 import torch
+import time
 
 
 def make_anchors(feats, strides, grid_cell_offset=0.5):
@@ -204,7 +205,6 @@ def __init__(self, in_channel, out_channel, kernel, stride, padding, dilation, g
         )
 
     def forward(self, x):
-        #
         x1 = self.cv1(x)
         x2 = self.cv2(x)
         x = self.m(x1)
@@ -216,9 +216,6 @@ def forward(self, x):
 class C3k2(nn.Module):
     def __init__(self, in_channel, out_channel, kernel, stride, padding, dilation, groups, is_bk_enabled=False):
         super().__init__()
-        #
-        #     f"c3k2 init is called,{in_channel}, {out_channel}, {kernel},{stride}, {padding}, {dilation}, {groups},{is_bk_enabled}"
-        # )
         self.is_bk_enabled = is_bk_enabled
         if is_bk_enabled:
             self.cv1 = Conv(
@@ -714,31 +711,22 @@ def forward(self, y1, y2, y3):
 
         ya = torch.reshape(ya, (ya.shape[0], int(ya.shape[1] / self.in_channel[24]), self.in_channel[24], ya.shape[2]))
         ya = torch.permute(ya, (0, 2, 1, 3))
-        ya = f.softmax(ya, dim=1)  # torch.Size([1, 16, 4, 1029])
+        ya = f.softmax(ya, dim=1)
         c = self.dfl(ya)
         c1 = torch.reshape(c, (c.shape[0], c.shape[1] * c.shape[2], c.shape[3]))
         c2 = c1
         c1 = c1[:, 0:2, :]
         c2 = c2[:, 2:4, :]
-
         anchor, strides = (y_all.transpose(0, 1) for y_all in make_anchors(y_all, [8, 16, 32], 0.5))
         anchor.unsqueeze(0)
-
         c1 = anchor - c1
         c2 = anchor + c2
-
-        # print(c1.shape, c2.shape)
-
         z1 = c2 - c1
         z2 = c1 + c2
-
         z2 = z2 / 2
-
         z = torch.concat((z2, z1), 1)
         z = z * strides
-        # yb = torch.load("yb.pt")
         yb = torch.sigmoid(yb)
-        # return yb
         out = torch.concat((z, yb), 1)
         return out
 
@@ -890,24 +878,19 @@ def forward(self, x):
         x = self.model[2](x)  # 2
         x = self.model[3](x)  # 3
         x = self.model[4](x)  # 4
-        # torch.save(x, "/home/ubuntu/tt-metal/models/experimental/functional_yolov11/dumps/torch_out.pth")
         x4 = x
         x = self.model[5](x)  # 5
         x = self.model[6](x)  # 6
         x6 = x
         x = self.model[7](x)  # 7
         x = self.model[8](x)  # 8
-
         x = self.model[9](x)  # 9
         x = self.model[10](x)  # 10
-        # torch.save(x,"/home/ubuntu/venkatesh_yolov11/tt-metal/models/experimental/functional_yolov11/dumps/torch_out.pth")
         x10 = x
-        # print("input to upsample1 is ", x.shape)
         x = f.upsample(x, scale_factor=2.0)  # 11
         x = torch.cat((x, x6), 1)  # 12
         x = self.model[13](x)  # 13
         x13 = x
-        # print("input to upsample2 is ", x.shape)
         x = f.upsample(x, scale_factor=2.0)  # 14
         x = torch.cat((x, x4), 1)  # 15
         x = self.model[16](x)  # 16
@@ -941,28 +924,6 @@ def __init__(self, c1, c2, k=1, s=1, d=1, act=True):  # ch_in, ch_out, kernel, s
         super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)
 
 
-# class DFL(nn.Module):
-#     """
-#     Integral module of Distribution Focal Loss (DFL).
-
-#     Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
-#     """
-
-#     def __init__(self, c1=16):
-#         """Initialize a convolutional layer with a given number of input channels."""
-#         super().__init__()
-#         self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False)
-#         x = torch.arange(c1, dtype=torch.float)
-#         self.conv.weight.data[:] = nn.Parameter(x.view(1, c1, 1, 1))
-#         self.c1 = c1
-
-#     def forward(self, x):
-#         """Applies a transformer layer on input tensor 'x' and returns a tensor."""
-#         b, _, a = x.shape  # batch, channels, anchors
-#         return self.conv(x.view(b, 4, self.c1, a).transpose(2, 1).softmax(1)).view(b, 4, a)
-#         # return self.conv(x.view(b, self.c1, 4, a).softmax(1)).view(b, 4, a)
-
-
 class BaseModel(nn.Module):
     def forward(self, x, *args, **kwargs):
         if isinstance(x, dict):
@@ -987,14 +948,61 @@ def __init__(self, cfg="yolov8n.yaml", ch=3, nc=None, verbose=True):
         super().__init__()
 
 
-# model = YoloV11()
-# model_graph = draw_graph(
-# model,
-# input_size=(1, 3, 224, 224),
-# dtypes=[torch.float32],
-# expand_nested=True,
-# graph_name="yolov11_ref",
-# depth=10,
-# directory=".",
-# )
-# model_graph.visual_graph.render(format="pdf")
+class Ensemble(nn.ModuleList):
+    def __init__(self):
+        super(Ensemble, self).__init__()
+
+    def forward(self, x, augment=False):
+        y = []
+        for module in self:
+            y.append(module(x, augment)[0])
+        y = torch.cat(y, 1)
+        return y, None
+
+
+def attempt_download(file, repo="ultralytics/assets", key="reference"):
+    tests = Path(__file__).parent.parent / key
+    file_path = tests / Path(str(file).strip().replace("'", "").lower())
+    if not file_path.exists():
+        name = "yolo11n.pt"
+        msg = f"{file_path} missing, try downloading from https://github.com/{repo}/releases/"
+
+        try:
+            url = f"https://github.com/{repo}/releases/download/v8.3.0/{name}"
+
+            print(f"Downloading {url} to {file_path}...")
+            torch.hub.download_url_to_file(url, file_path)
+            assert file_path.exists() and file_path.stat().st_size > 1e6, f"Download failed for {name}"
+
+        except Exception as e:
+            print(f"Error downloading from GitHub: {e}. Trying secondary source...")
+            url = f"https://storage.googleapis.com/{repo}/ckpt/{name}"
+            print(f"Downloading {url} to {file_path}...")
+            os.system(f"curl -L {url} -o {file_path}")
+            if not file_path.exists() or file_path.stat().st_size < 1e6:
+                file_path.unlink(missing_ok=True)
+                print(f"ERROR: Download failure for {msg}")
+            else:
+                print(f"Download succeeded from secondary source!")
+    return file_path
+
+
+def attempt_load(weights, map_location=None):
+    model = Ensemble()
+    for w in weights if isinstance(weights, list) else [weights]:
+        weight_path = attempt_download(w)
+        ckpt = torch.load(weight_path, map_location=map_location)
+        model.append(ckpt["ema" if ckpt.get("ema") else "model"].float().eval())
+
+    for m in model.modules():
+        if isinstance(m, (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU)):
+            m.inplace = True
+        elif isinstance(m, nn.Upsample):
+            m.recompute_scale_factor = None
+
+    if len(model) == 1:
+        return model[-1]
+    else:
+        for k in ["names", "stride"]:
+            setattr(model, k, getattr(model[-1], k))
+        return model
diff --git a/models/experimental/functional_yolov11/test/test_ttnn_attention.py b/models/experimental/functional_yolov11/test/test_ttnn_attention.py
deleted file mode 100644
index ed6d3fae468..00000000000
--- a/models/experimental/functional_yolov11/test/test_ttnn_attention.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
-# SPDX-License-Identifier: Apache-2.0
-import pytest
-import ttnn
-import torch
-from tests.ttnn.utils_for_testing import assert_with_pcc
-from models.experimental.functional_yolov11.tt.model_preprocessing import (
-    create_yolov11_input_tensors,
-    create_yolov11_model_parameters,
-)
-from models.experimental.functional_yolov11.reference.yolov11 import Attention as torch_attention
-from models.experimental.functional_yolov11.tt.ttnn_yolov11 import Attention as ttnn_attention
-
-
-@pytest.mark.parametrize(
-    "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape",
-    [
-        ([128, 128, 128], [256, 128, 128], [1, 1, 3], [1, 1, 1], [0, 0, 1], [1, 1, 1], [1, 1, 128], [1, 128, 7, 7]),
-    ],
-)
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
-def test_yolo_v11_attention(
-    device,
-    use_program_cache,
-    reset_seeds,
-    in_channel,
-    out_channel,
-    kernel,
-    stride,
-    padding,
-    dilation,
-    groups,
-    fwd_input_shape,
-):
-    torch_module = torch_attention(in_channel, out_channel, kernel, stride, padding, dilation, groups)
-    torch_module.eval()
-    torch_input, ttnn_input = create_yolov11_input_tensors(
-        device,
-        batch=fwd_input_shape[0],
-        input_channels=fwd_input_shape[1],
-        input_height=fwd_input_shape[2],
-        input_width=fwd_input_shape[3],
-    )
-    ttnn_input = ttnn.to_device(ttnn_input, device=device)
-    ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG)
-    torch_output = torch_module(torch_input)
-    parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device)
-    ttnn_module = ttnn_attention(device=device, parameter=parameters.conv_args, conv_pt=parameters)
-    ttnn_output = ttnn_module(x=ttnn_input, device=device)
-    ttnn_output = ttnn.to_torch(ttnn_output)
-    ttnn_output = ttnn_output.permute(0, 3, 1, 2)
-    ttnn_output = ttnn_output.reshape(torch_output.shape)
-    assert_with_pcc(torch_output, ttnn_output, 0.99999)
diff --git a/models/experimental/functional_yolov11/test/test_ttnn_bottleneck.py b/models/experimental/functional_yolov11/test/test_ttnn_bottleneck.py
deleted file mode 100644
index 3aee44b8e84..00000000000
--- a/models/experimental/functional_yolov11/test/test_ttnn_bottleneck.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
-# SPDX-License-Identifier: Apache-2.0
-import pytest
-import ttnn
-from tests.ttnn.utils_for_testing import assert_with_pcc
-from models.experimental.functional_yolov11.tt.model_preprocessing import (
-    create_yolov11_input_tensors,
-    create_yolov11_model_parameters,
-)
-from models.experimental.functional_yolov11.reference.yolov11 import Bottleneck as torch_bottleneck
-from models.experimental.functional_yolov11.tt.ttnn_yolov11 import Bottleneck as ttnn_bottleneck
-
-
-@pytest.mark.parametrize(
-    "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape",
-    [
-        ([16, 8], [8, 16], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 16, 56, 56]),  # 1
-        ([32, 16], [16, 32], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 32, 38, 28]),  # 2
-        ([32, 32], [32, 32], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 32, 14, 14]),  # 3
-        ([64, 64], [64, 64], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 64, 7, 7]),
-        ([64, 32], [32, 64], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 64, 14, 14]),
-        ([32, 16], [16, 32], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 32, 28, 28]),
-        ([64, 32], [32, 64], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 64, 14, 14]),
-        ([64, 64], [64, 64], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 64, 7, 7]),
-    ],
-)
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
-def test_yolo_v11_bottleneck(
-    device,
-    use_program_cache,
-    reset_seeds,
-    in_channel,
-    out_channel,
-    kernel,
-    stride,
-    padding,
-    dilation,
-    groups,
-    fwd_input_shape,
-):
-    torch_module = torch_bottleneck(in_channel, out_channel, kernel, stride, padding, dilation, groups)
-    torch_module.eval()
-    torch_input, ttnn_input = create_yolov11_input_tensors(
-        device,
-        batch=fwd_input_shape[0],
-        input_channels=fwd_input_shape[1],
-        input_height=fwd_input_shape[2],
-        input_width=fwd_input_shape[3],
-    )
-    ttnn_input = ttnn.to_device(ttnn_input, device=device)
-    ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG)
-    torch_output = torch_module(torch_input)
-    parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device)
-    ttnn_module = ttnn_bottleneck(device=device, parameter=parameters.conv_args, conv_pt=parameters)
-    ttnn_output = ttnn_module(x=ttnn_input, device=device)
-    ttnn_output = ttnn.to_torch(ttnn_output)
-    ttnn_output = ttnn_output.permute(0, 3, 1, 2)
-    ttnn_output = ttnn_output.reshape(torch_output.shape)
-    assert_with_pcc(torch_output, ttnn_output, 0.99999)
diff --git a/models/experimental/functional_yolov11/test/test_ttnn_c2psa.py b/models/experimental/functional_yolov11/test/test_ttnn_c2psa.py
deleted file mode 100644
index 25b6bbe79f5..00000000000
--- a/models/experimental/functional_yolov11/test/test_ttnn_c2psa.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
-# SPDX-License-Identifier: Apache-2.0
-import pytest
-import ttnn
-import torch
-from tests.ttnn.utils_for_testing import assert_with_pcc
-from models.experimental.functional_yolov11.tt.model_preprocessing import (
-    create_yolov11_input_tensors,
-    create_yolov11_model_parameters,
-)
-from models.experimental.functional_yolov11.reference.yolov11 import C2PSA as torch_c2psa_block
-from models.experimental.functional_yolov11.tt.ttnn_yolov11 import C2PSA as ttnn_c2psa_block
-
-
-@pytest.mark.parametrize(
-    "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape",
-    [
-        (
-            [256, 256, 128, 128, 128, 128, 256],
-            [256, 256, 256, 128, 128, 256, 128],
-            [1, 1, 1, 1, 3, 1, 1],
-            [1, 1, 1, 1, 1, 1, 1],
-            [0, 0, 0, 0, 1, 0, 0],
-            [1, 1, 1, 1, 1, 1, 1],
-            [1, 1, 1, 1, 128, 1, 1],
-            [1, 256, 7, 7],
-        ),
-    ],
-)
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
-def test_yolo_v11_c2psa_block(
-    device,
-    use_program_cache,
-    reset_seeds,
-    in_channel,
-    out_channel,
-    kernel,
-    stride,
-    padding,
-    dilation,
-    groups,
-    fwd_input_shape,
-):
-    torch_module = torch_c2psa_block(in_channel, out_channel, kernel, stride, padding, dilation, groups)
-    torch_module.eval()
-    torch_input, ttnn_input = create_yolov11_input_tensors(
-        device,
-        batch=fwd_input_shape[0],
-        input_channels=fwd_input_shape[1],
-        input_height=fwd_input_shape[2],
-        input_width=fwd_input_shape[3],
-    )
-    ttnn_input = ttnn.to_device(ttnn_input, device=device)
-    ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG)
-    torch_output = torch_module(torch_input)
-    parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device)
-    ttnn_module = ttnn_c2psa_block(device=device, parameter=parameters.conv_args, conv_pt=parameters)
-    ttnn_output = ttnn_module(x=ttnn_input, device=device)
-    ttnn_output = ttnn.to_torch(ttnn_output)
-    ttnn_output = ttnn_output.permute(0, 3, 1, 2)
-    ttnn_output = ttnn_output.reshape(torch_output.shape)
-    assert_with_pcc(torch_output, ttnn_output, 0.99999)
diff --git a/models/experimental/functional_yolov11/test/test_ttnn_c3k.py b/models/experimental/functional_yolov11/test/test_ttnn_c3k.py
deleted file mode 100644
index 13aade02c0e..00000000000
--- a/models/experimental/functional_yolov11/test/test_ttnn_c3k.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
-# SPDX-License-Identifier: Apache-2.0
-
-import pytest
-import ttnn
-import torch
-from tests.ttnn.utils_for_testing import assert_with_pcc
-from models.experimental.functional_yolov11.tt.model_preprocessing import (
-    create_yolov11_input_tensors,
-    create_yolov11_model_parameters,
-)
-from models.experimental.functional_yolov11.reference.yolov11 import C3k as torch_c3k
-from models.experimental.functional_yolov11.tt.ttnn_yolov11 import C3K as ttnn_c3k
-
-
-@pytest.mark.parametrize(
-    "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape",
-    [
-        (
-            [64, 64, 64, 32, 32, 32, 32],
-            [32, 32, 64, 32, 32, 32, 32],
-            [1, 1, 1, 3, 3, 3, 3],
-            [1, 1, 1, 1, 1, 1, 1],
-            [0, 0, 0, 1, 1, 1, 1],
-            [1, 1, 1, 1, 1, 1, 1],
-            [1, 1, 1, 1, 1, 1, 1],
-            [1, 64, 14, 14],
-        ),
-        (
-            [128, 128, 128, 64, 64, 64, 64],
-            [64, 64, 128, 64, 64, 64, 64],
-            [1, 1, 1, 3, 3, 3, 3],
-            [1, 1, 1, 1, 1, 1, 1],
-            [0, 0, 0, 1, 1, 1, 1],
-            [1, 1, 1, 1, 1, 1, 1],
-            [1, 1, 1, 1, 1, 1, 1],
-            [1, 128, 7, 7],
-        ),
-        (
-            [64, 64, 64, 32, 32, 32, 32],
-            [32, 32, 64, 32, 32, 32, 32],
-            [1, 1, 1, 3, 3, 3, 3],
-            [1, 1, 1, 1, 1, 1, 1],
-            [0, 0, 0, 1, 1, 1, 1],
-            [1, 1, 1, 1, 1, 1, 1],
-            [1, 1, 1, 1, 1, 1, 1],
-            [1, 64, 40, 40],
-        ),
-        (
-            [128, 128, 128, 64, 64, 64, 64],
-            [64, 64, 128, 64, 64, 64, 64],
-            [1, 1, 1, 3, 3, 3, 3],
-            [1, 1, 1, 1, 1, 1, 1],
-            [0, 0, 0, 1, 1, 1, 1],
-            [1, 1, 1, 1, 1, 1, 1],
-            [1, 1, 1, 1, 1, 1, 1],
-            [1, 128, 20, 20],
-        ),
-    ],
-)
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
-def test_yolo_v11_c3k(
-    device,
-    use_program_cache,
-    reset_seeds,
-    in_channel,
-    out_channel,
-    kernel,
-    stride,
-    padding,
-    dilation,
-    groups,
-    fwd_input_shape,
-):
-    torch_module = torch_c3k(in_channel, out_channel, kernel, stride, padding, dilation, groups)
-    torch_module.eval()
-    torch_input, ttnn_input = create_yolov11_input_tensors(
-        device,
-        batch=fwd_input_shape[0],
-        input_channels=fwd_input_shape[1],
-        input_height=fwd_input_shape[2],
-        input_width=fwd_input_shape[3],
-    )
-    ttnn_input = ttnn.to_device(ttnn_input, device=device)
-    ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG)
-    torch_output = torch_module(torch_input)
-    parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device)
-    ttnn_module = ttnn_c3k(device=device, parameter=parameters.conv_args, conv_pt=parameters)
-    ttnn_output = ttnn_module(x=ttnn_input, device=device)  # ttnn.Shape([1, 1, 224, 64])
-    ttnn_output = ttnn.to_torch(ttnn_output)
-    ttnn_output = ttnn_output.permute(0, 3, 1, 2)
-    ttnn_output = ttnn_output.reshape(torch_output.shape)
-    assert_with_pcc(torch_output, ttnn_output, 0.99999)
diff --git a/models/experimental/functional_yolov11/test/test_ttnn_c3k2.py b/models/experimental/functional_yolov11/test/test_ttnn_c3k2.py
deleted file mode 100644
index 0c75d152c17..00000000000
--- a/models/experimental/functional_yolov11/test/test_ttnn_c3k2.py
+++ /dev/null
@@ -1,234 +0,0 @@
-# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
-# SPDX-License-Identifier: Apache-2.0
-
-import pytest
-import ttnn
-import torch
-from tests.ttnn.utils_for_testing import assert_with_pcc
-from models.experimental.functional_yolov11.tt.model_preprocessing import (
-    create_yolov11_input_tensors,
-    create_yolov11_model_parameters,
-)
-from models.experimental.functional_yolov11.reference.yolov11 import C3k2 as torch_c3k2
-from models.experimental.functional_yolov11.tt.ttnn_yolov11 import C3k2 as ttnn_c3k2
-
-
-@pytest.mark.parametrize(
-    "in_channel, out_channel, kernel, stride, padding, dilation, groups,is_bk_enabled,fwd_input_shape",
-    [
-        # 224
-        # (
-        #     [32, 48, 16, 8],
-        #     [32, 64, 8, 16],
-        #     [1, 1, 3, 3],
-        #     [1, 1, 1, 1],
-        #     [0, 0, 1, 1],
-        #     [1, 1, 1, 1],
-        #     [1, 1, 1, 1],
-        #     True,
-        #     [1, 32, 56, 56],
-        # ),
-        # (
-        #     [64, 96, 32, 16],
-        #     [64, 128, 16, 32],
-        #     [1, 1, 3, 3],
-        #     [1, 1, 1, 1],
-        #     [0, 0, 1, 1],
-        #     [1, 1, 1, 1],
-        #     [1, 1, 1, 1],
-        #     True,
-        #     [1, 64, 28, 28],
-        # ),
-        # (
-        #     [128, 192, 64, 64, 64, 32, 32, 32, 32],
-        #     [128, 128, 32, 32, 64, 32, 32, 32, 32],
-        #     [1, 1, 1, 1, 1, 3, 3, 3, 3],
-        #     [1, 1, 1, 1, 1, 1, 1, 1, 1],
-        #     [0, 0, 0, 0, 0, 1, 1, 1, 1],
-        #     [1, 1, 1, 1, 1, 1, 1, 1, 1],
-        #     [1, 1, 1, 1, 1, 1, 1, 1, 1],
-        #     False,
-        #     [1, 128, 14, 14],
-        # ),
-        # (
-        #     [256, 384, 128, 128, 128, 64, 64, 64, 64],
-        #     [256, 256, 64, 64, 128, 64, 64, 64, 64],
-        #     [1, 1, 1, 1, 1, 3, 3, 3, 3],
-        #     [1, 1, 1, 1, 1, 1, 1, 1, 1],
-        #     [0, 0, 0, 0, 0, 1, 1, 1, 1],
-        #     [1, 1, 1, 1, 1, 1, 1, 1, 1],
-        #     [1, 1, 1, 1, 1, 1, 1, 1, 1],
-        #     False,
-        #     [1, 256, 7, 7],
-        # ),
-        # (
-        #     [384, 192, 64, 32],
-        #     [128, 128, 32, 64],
-        #     [1, 1, 3, 3],
-        #     [1, 1, 1, 1],
-        #     [0, 0, 1, 1],
-        #     [1, 1, 1, 1],
-        #     [1, 1, 1, 1],
-        #     True,
-        #     [1, 384, 14, 14],
-        # ),
-        # (
-        #     [256, 96, 32, 16],
-        #     [64, 64, 16, 32],
-        #     [1, 1, 3, 3],
-        #     [1, 1, 1, 1],
-        #     [0, 0, 1, 1],
-        #     [1, 1, 1, 1],
-        #     [1, 1, 1, 1],
-        #     True,
-        #     [1, 256, 28, 28],
-        # ),
-        # (
-        #     [192, 192, 64, 32],
-        #     [128, 128, 32, 64],
-        #     [1, 1, 3, 3],
-        #     [1, 1, 1, 1],
-        #     [0, 0, 1, 1],
-        #     [1, 1, 1, 1],
-        #     [1, 1, 1, 1],
-        #     True,
-        #     [1, 192, 14, 14],
-        # ),
-        # (
-        #     [384, 384, 128, 128, 128, 64, 64, 64, 64],
-        #     [256, 256, 64, 64, 128, 64, 64, 64, 64],
-        #     [1, 1, 1, 1, 1, 3, 3, 3, 3],
-        #     [1, 1, 1, 1, 1, 1, 1, 1, 1],
-        #     [0, 0, 0, 0, 0, 1, 1, 1, 1],
-        #     [1, 1, 1, 1, 1, 1, 1, 1, 1],
-        #     [1, 1, 1, 1, 1, 1, 1, 1, 1],
-        #     False,
-        #     [1, 384, 7, 7],
-        # ),
-        # #640
-        (
-            [32, 48, 16, 8],
-            [32, 64, 8, 16],
-            [1, 1, 3, 3],
-            [1, 1, 1, 1],
-            [0, 0, 1, 1],
-            [1, 1, 1, 1],
-            [1, 1, 1, 1],
-            True,
-            [1, 32, 160, 160],
-        ),
-        (
-            [64, 96, 32, 16],
-            [64, 128, 16, 32],
-            [1, 1, 3, 3],
-            [1, 1, 1, 1],
-            [0, 0, 1, 1],
-            [1, 1, 1, 1],
-            [1, 1, 1, 1],
-            True,
-            [1, 64, 80, 80],
-        ),
-        (
-            [128, 192, 64, 64, 64, 32, 32, 32, 32],
-            [128, 128, 32, 32, 64, 32, 32, 32, 32],
-            [1, 1, 1, 1, 1, 3, 3, 3, 3],
-            [1, 1, 1, 1, 1, 1, 1, 1, 1],
-            [0, 0, 0, 0, 0, 1, 1, 1, 1],
-            [1, 1, 1, 1, 1, 1, 1, 1, 1],
-            [1, 1, 1, 1, 1, 1, 1, 1, 1],
-            False,
-            [1, 128, 40, 40],
-        ),
-        (
-            [256, 384, 128, 128, 128, 64, 64, 64, 64],
-            [256, 256, 64, 64, 128, 64, 64, 64, 64],
-            [1, 1, 1, 1, 1, 3, 3, 3, 3],
-            [1, 1, 1, 1, 1, 1, 1, 1, 1],
-            [0, 0, 0, 0, 0, 1, 1, 1, 1],
-            [1, 1, 1, 1, 1, 1, 1, 1, 1],
-            [1, 1, 1, 1, 1, 1, 1, 1, 1],
-            False,
-            [1, 256, 20, 20],
-        ),
-        (
-            [384, 192, 64, 32],
-            [128, 128, 32, 64],
-            [1, 1, 3, 3],
-            [1, 1, 1, 1],
-            [0, 0, 1, 1],
-            [1, 1, 1, 1],
-            [1, 1, 1, 1],
-            True,
-            [1, 384, 40, 40],
-        ),
-        (
-            [256, 96, 32, 16],
-            [64, 64, 16, 32],
-            [1, 1, 3, 3],
-            [1, 1, 1, 1],
-            [0, 0, 1, 1],
-            [1, 1, 1, 1],
-            [1, 1, 1, 1],
-            True,
-            [1, 256, 80, 80],
-        ),
-        (
-            [192, 192, 64, 32],
-            [128, 128, 32, 64],
-            [1, 1, 3, 3],
-            [1, 1, 1, 1],
-            [0, 0, 1, 1],
-            [1, 1, 1, 1],
-            [1, 1, 1, 1],
-            True,
-            [1, 192, 40, 40],
-        ),
-        (
-            [384, 384, 128, 128, 128, 64, 64, 64, 64],
-            [256, 256, 64, 64, 128, 64, 64, 64, 64],
-            [1, 1, 1, 1, 1, 3, 3, 3, 3],
-            [1, 1, 1, 1, 1, 1, 1, 1, 1],
-            [0, 0, 0, 0, 0, 1, 1, 1, 1],
-            [1, 1, 1, 1, 1, 1, 1, 1, 1],
-            [1, 1, 1, 1, 1, 1, 1, 1, 1],
-            False,
-            [1, 384, 20, 20],
-        ),
-    ],
-)
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
-def test_yolo_v11_c3k2(
-    device,
-    use_program_cache,
-    reset_seeds,
-    in_channel,
-    out_channel,
-    kernel,
-    stride,
-    padding,
-    dilation,
-    groups,
-    is_bk_enabled,
-    fwd_input_shape,
-):
-    torch_module = torch_c3k2(in_channel, out_channel, kernel, stride, padding, dilation, groups, is_bk_enabled)
-    torch_module.eval()
-    torch_input, ttnn_input = create_yolov11_input_tensors(
-        device,
-        batch=fwd_input_shape[0],
-        input_channels=fwd_input_shape[1],
-        input_height=fwd_input_shape[2],
-        input_width=fwd_input_shape[3],
-    )
-    ttnn_input = ttnn.to_device(ttnn_input, device=device, memory_config=ttnn.L1_MEMORY_CONFIG)
-    # ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, )
-    torch_output = torch_module(torch_input)
-    parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device)
-    ttnn_module = ttnn_c3k2(
-        device=device, parameter=parameters.conv_args, conv_pt=parameters, is_bk_enabled=is_bk_enabled
-    )
-    ttnn_output = ttnn_module(x=ttnn_input, device=device)
-    ttnn_output = ttnn.to_torch(ttnn_output)
-    ttnn_output = ttnn_output.permute(0, 3, 1, 2)
-    ttnn_output = ttnn_output.reshape(torch_output.shape)
-    assert_with_pcc(torch_output, ttnn_output, 0.99999)
diff --git a/models/experimental/functional_yolov11/test/test_ttnn_detect.py b/models/experimental/functional_yolov11/test/test_ttnn_detect.py
deleted file mode 100644
index 2108bf23eb2..00000000000
--- a/models/experimental/functional_yolov11/test/test_ttnn_detect.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
-# SPDX-License-Identifier: Apache-2.0
-import pytest
-import ttnn
-import torch
-from tests.ttnn.utils_for_testing import assert_with_pcc
-from models.experimental.functional_yolov11.tt.model_preprocessing import (
-    create_yolov11_input_tensors,
-    create_yolov11_model_parameters_detect,
-    create_yolov11_model_parameters,
-)
-from models.experimental.functional_yolov11.reference.yolov11 import Detect as torch_detect
-from models.experimental.functional_yolov11.tt.ttnn_yolov11 import Detect as ttnn_detect
-from ttnn.model_preprocessing import preprocess_model_parameters
-import math
-
-
-@pytest.mark.parametrize(
-    "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape",
-    [
-        (
-            [64, 64, 64, 128, 64, 64, 256, 64, 64, 64, 64, 80, 80, 80, 128, 128, 80, 80, 80, 256, 256, 80, 80, 80, 16],
-            [64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 80, 80, 80, 80, 128, 80, 80, 80, 80, 256, 80, 80, 80, 80, 1],
-            [3, 3, 1, 3, 3, 1, 3, 3, 1, 3, 1, 3, 1, 1, 3, 1, 3, 1, 1, 3, 1, 3, 1, 1, 1],
-            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
-            [1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0],
-            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
-            [1, 1, 1, 1, 1, 1, 1, 1, 1, 64, 1, 80, 1, 1, 128, 1, 80, 1, 1, 256, 1, 80, 1, 1, 1],
-            [[1, 64, 28, 28], [1, 128, 14, 14], [1, 256, 7, 7]],
-        ),
-    ],
-)
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
-def test_yolo_v11_detect(
-    device,
-    use_program_cache,
-    reset_seeds,
-    in_channel,
-    out_channel,
-    kernel,
-    stride,
-    padding,
-    dilation,
-    groups,
-    fwd_input_shape,
-):
-    torch_module = torch_detect(in_channel, out_channel, kernel, stride, padding, dilation, groups)
-    torch_module.eval()
-    torch_input_1, ttnn_input_1 = create_yolov11_input_tensors(
-        device,
-        batch=fwd_input_shape[0][0],
-        input_channels=fwd_input_shape[0][1],
-        input_height=fwd_input_shape[0][2],
-        input_width=fwd_input_shape[0][3],
-    )
-    torch_input_2, ttnn_input_2 = create_yolov11_input_tensors(
-        device,
-        batch=fwd_input_shape[1][0],
-        input_channels=fwd_input_shape[1][1],
-        input_height=fwd_input_shape[1][2],
-        input_width=fwd_input_shape[1][3],
-    )
-    torch_input_3, ttnn_input_3 = create_yolov11_input_tensors(
-        device,
-        batch=fwd_input_shape[2][0],
-        input_channels=fwd_input_shape[2][1],
-        input_height=fwd_input_shape[2][2],
-        input_width=fwd_input_shape[2][3],
-    )
-    ttnn_input_1 = ttnn.to_device(ttnn_input_1, device=device)
-    ttnn_input_1 = ttnn.to_layout(ttnn_input_1, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG)
-    ttnn_input_2 = ttnn.to_device(ttnn_input_2, device=device)
-    ttnn_input_2 = ttnn.to_layout(ttnn_input_2, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG)
-    ttnn_input_3 = ttnn.to_device(ttnn_input_3, device=device)
-    ttnn_input_3 = ttnn.to_layout(ttnn_input_3, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG)
-    torch_output = torch_module(torch_input_1, torch_input_2, torch_input_3)
-    parameters = create_yolov11_model_parameters_detect(
-        torch_module, torch_input_1, torch_input_2, torch_input_3, device=device
-    )
-    ttnn_module = ttnn_detect(device=device, parameter=parameters.model, conv_pt=parameters)
-
-    ttnn_output = ttnn_module(y1=ttnn_input_1, y2=ttnn_input_2, y3=ttnn_input_3, device=device)
-    ttnn_output = ttnn.to_torch(ttnn_output)
-    # ttnn_output = ttnn_output.permute(0, 2, 1)
-    ttnn_output = ttnn_output.reshape(torch_output.shape)
-    assert_with_pcc(torch_output, ttnn_output, 0.99999)
diff --git a/models/experimental/functional_yolov11/test/test_ttnn_dtype_issue.py b/models/experimental/functional_yolov11/test/test_ttnn_dtype_issue.py
deleted file mode 100644
index 256d7cee684..00000000000
--- a/models/experimental/functional_yolov11/test/test_ttnn_dtype_issue.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
-
-# SPDX-License-Identifier: Apache-2.0
-
-
-import pytest
-import ttnn
-import torch
-from tests.ttnn.utils_for_testing import assert_with_pcc
-
-
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 16384}], indirect=True)
-def test_dtype_issue(device):
-    a = torch.randn((1, 256, 1, 49), dtype=torch.bfloat16)
-    a_ttnn = ttnn.from_torch(a, device=device, layout=ttnn.TILE_LAYOUT, dtype=ttnn.bfloat8_b)
-    a_ttnn = ttnn.to_dtype(a_ttnn, ttnn.bfloat16)
-    ttnn_output = ttnn.to_torch(a_ttnn)
-    assert_with_pcc(a, ttnn_output, 0.99999)
diff --git a/models/experimental/functional_yolov11/test/test_ttnn_psa_block.py b/models/experimental/functional_yolov11/test/test_ttnn_psa_block.py
deleted file mode 100644
index c2643318abf..00000000000
--- a/models/experimental/functional_yolov11/test/test_ttnn_psa_block.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
-# SPDX-License-Identifier: Apache-2.0
-import pytest
-import ttnn
-import torch
-from tests.ttnn.utils_for_testing import assert_with_pcc
-from models.experimental.functional_yolov11.tt.model_preprocessing import (
-    create_yolov11_input_tensors,
-    create_yolov11_model_parameters,
-)
-from models.experimental.functional_yolov11.reference.yolov11 import PSABlock as torch_psa_block
-from models.experimental.functional_yolov11.tt.ttnn_yolov11 import PSABlock as ttnn_psa_block
-
-
-@pytest.mark.parametrize(
-    "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape",
-    [
-        (
-            [128, 128, 128, 128, 256],
-            [256, 128, 128, 256, 128],
-            [1, 1, 3, 1, 1],
-            [1, 1, 1, 1, 1],
-            [0, 0, 1, 0, 0],
-            [1, 1, 1, 1, 1],
-            [1, 1, 128, 1, 1],
-            [1, 128, 7, 7],
-        ),
-    ],
-)
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
-def test_yolo_v11_psa_block(
-    device,
-    use_program_cache,
-    reset_seeds,
-    in_channel,
-    out_channel,
-    kernel,
-    stride,
-    padding,
-    dilation,
-    groups,
-    fwd_input_shape,
-):
-    torch_module = torch_psa_block(in_channel, out_channel, kernel, stride, padding, dilation, groups)
-    torch_module.eval()
-    torch_input, ttnn_input = create_yolov11_input_tensors(
-        device,
-        batch=fwd_input_shape[0],
-        input_channels=fwd_input_shape[1],
-        input_height=fwd_input_shape[2],
-        input_width=fwd_input_shape[3],
-    )
-    ttnn_input = ttnn.to_device(ttnn_input, device=device)
-    ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG)
-    torch_output = torch_module(torch_input)
-    parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device)
-    ttnn_module = ttnn_psa_block(device=device, parameter=parameters.conv_args, conv_pt=parameters)
-    ttnn_output = ttnn_module(x=ttnn_input, device=device)
-    ttnn_output = ttnn.to_torch(ttnn_output)
-    ttnn_output = ttnn_output.permute(0, 3, 1, 2)
-    ttnn_output = ttnn_output.reshape(torch_output.shape)
-    assert_with_pcc(torch_output, ttnn_output, 0.99999)
diff --git a/models/experimental/functional_yolov11/test/test_ttnn_sppf.py b/models/experimental/functional_yolov11/test/test_ttnn_sppf.py
deleted file mode 100644
index 8e8f6310df0..00000000000
--- a/models/experimental/functional_yolov11/test/test_ttnn_sppf.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
-# SPDX-License-Identifier: Apache-2.0
-
-import pytest
-import ttnn
-import torch
-from tests.ttnn.utils_for_testing import assert_with_pcc
-from models.experimental.functional_yolov11.tt.model_preprocessing import (
-    create_yolov11_input_tensors,
-    create_yolov11_model_parameters,
-)
-from models.experimental.functional_yolov11.reference.yolov11 import SPPF as torch_sppf
-from models.experimental.functional_yolov11.tt.ttnn_yolov11 import SPPF as ttnn_sppf
-
-
-@pytest.mark.parametrize(
-    "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape",
-    [
-        ([256, 512], [128, 256], [1, 1], [1, 1], [0, 0], [1, 1], [1, 1], [1, 256, 20, 20]),
-    ],
-)
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
-def test_yolo_v11_sppf(
-    device,
-    use_program_cache,
-    reset_seeds,
-    in_channel,
-    out_channel,
-    kernel,
-    stride,
-    padding,
-    dilation,
-    groups,
-    fwd_input_shape,
-):
-    torch_module = torch_sppf(in_channel, out_channel, kernel, stride, padding, dilation, groups)
-    torch_module.eval()
-    torch_input, ttnn_input = create_yolov11_input_tensors(
-        device,
-        batch=fwd_input_shape[0],
-        input_channels=fwd_input_shape[1],
-        input_height=fwd_input_shape[2],
-        input_width=fwd_input_shape[3],
-    )
-    ttnn_input = ttnn.to_device(ttnn_input, device=device)
-    ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG)
-    torch_output = torch_module(torch_input)
-    parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device)
-    ttnn_module = ttnn_sppf(device=device, parameter=parameters.conv_args, conv_pt=parameters)
-    ttnn_output = ttnn_module(x=ttnn_input, device=device)
-    ttnn_output = ttnn.to_torch(ttnn_output)
-    ttnn_output = ttnn_output.permute(0, 3, 1, 2)
-    ttnn_output = ttnn_output.reshape(torch_output.shape)
-    assert_with_pcc(torch_output, ttnn_output, 0.99999)
diff --git a/models/experimental/functional_yolov11/test/test_ttnn_yolov11.py b/models/experimental/functional_yolov11/test/test_ttnn_yolov11.py
deleted file mode 100644
index f808f36236a..00000000000
--- a/models/experimental/functional_yolov11/test/test_ttnn_yolov11.py
+++ /dev/null
@@ -1,85 +0,0 @@
-# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
-
-# SPDX-License-Identifier: Apache-2.0
-
-import pytest
-import ttnn
-import torch
-from tests.ttnn.utils_for_testing import assert_with_pcc
-import sys
-
-from models.experimental.functional_yolov11.reference import yolov11
-
-from models.experimental.functional_yolov11.tt.model_preprocessing import (
-    create_yolov11_input_tensors,
-    create_yolov11_model_parameters,
-)
-from models.experimental.functional_yolov11.tt import ttnn_yolov11
-import torch.nn as nn
-
-try:
-    sys.modules["ultralytics"] = yolov11
-    sys.modules["ultralytics.nn.tasks"] = yolov11
-    sys.modules["ultralytics.nn.modules.conv"] = yolov11
-    sys.modules["ultralytics.nn.modules.block"] = yolov11
-    sys.modules["ultralytics.nn.modules.head"] = yolov11
-
-except KeyError:
-    print("models.experimental.functional_yolov11.reference.yolov11 not found.")
-
-
-class Ensemble(nn.ModuleList):
-    def __init__(self):
-        super(Ensemble, self).__init__()
-
-    def forward(self, x, augment=False):
-        y = []
-        for module in self:
-            y.append(module(x, augment)[0])
-        y = torch.cat(y, 1)
-        return y, None
-
-
-def attempt_load(weights, map_location=None):
-    model = Ensemble()
-    for w in weights if isinstance(weights, list) else [weights]:
-        w = "models/experimental/functional_yolov11/reference/yolo11n.pt"
-        ckpt = torch.load(w, map_location=map_location)
-        model.append(ckpt["ema" if ckpt.get("ema") else "model"].float().eval())
-    for m in model.modules():
-        if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
-            m.inplace = True
-        elif type(m) is nn.Upsample:
-            m.recompute_scale_factor = None
-
-    if len(model) == 1:
-        return model[-1]
-    else:
-        for k in ["names", "stride"]:
-            setattr(model, k, getattr(model[-1], k))
-        return model
-
-
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
-def test_yolov11(device, use_program_cache, reset_seeds):
-    torch_input, ttnn_input = create_yolov11_input_tensors(device, input_channels=3, input_height=224, input_width=224)
-
-    torch_model = attempt_load("yolov11n.pt", map_location="cpu")
-    state_dict = torch_model.state_dict()
-    torch_model = yolov11.YoloV11()
-    ds_state_dict = {k: v for k, v in state_dict.items()}
-    new_state_dict = {}
-    for (name1, parameter1), (name2, parameter2) in zip(torch_model.state_dict().items(), ds_state_dict.items()):
-        if isinstance(parameter2, torch.FloatTensor):
-            new_state_dict[name1] = parameter2
-    torch_model.load_state_dict(new_state_dict)
-    torch_model.eval()
-
-    torch_output = torch_model(torch_input)
-    parameters = create_yolov11_model_parameters(torch_model, torch_input, device=device)
-    ttnn_model = ttnn_yolov11.YoloV11(device, parameters)
-    ttnn_output = ttnn_model(ttnn_input)
-    ttnn_output = ttnn.to_torch(ttnn_output)
-
-    ttnn_output = ttnn_output.reshape(torch_output.shape)
-    assert_with_pcc(torch_output, ttnn_output, 0.99999)
diff --git a/models/experimental/functional_yolov11/test/test_work_arounds_work.py b/models/experimental/functional_yolov11/test/test_work_arounds_work.py
deleted file mode 100644
index 1e8abd03813..00000000000
--- a/models/experimental/functional_yolov11/test/test_work_arounds_work.py
+++ /dev/null
@@ -1,110 +0,0 @@
-import torch, ttnn, pytest, torch.nn as nn
-from tests.ttnn.utils_for_testing import assert_with_pcc
-import ttnn
-
-
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
-def test_silu_alone(device, use_program_cache, reset_seeds):
-    torch_input_tensor = torch.randn(1, 32, 56, 56)
-    act = nn.SiLU(inplace=True)
-    torch_x = act(torch_input_tensor)
-
-    ttnn_input_tensor = torch.permute(torch_input_tensor, (0, 2, 3, 1))
-    ttnn_input_tensor = ttnn_input_tensor.reshape(
-        1,
-        1,
-        ttnn_input_tensor.shape[0] * ttnn_input_tensor.shape[1] * ttnn_input_tensor.shape[2],
-        ttnn_input_tensor.shape[3],
-    )
-    ttnn_x = ttnn.from_torch(
-        ttnn_input_tensor,
-        dtype=ttnn.bfloat16,
-        layout=ttnn.TILE_LAYOUT,
-        device=device,
-        memory_config=ttnn.L1_MEMORY_CONFIG,
-    )
-    ttnn_x = ttnn.silu(ttnn_x)
-    ttnn_x = ttnn.to_torch(ttnn_x).reshape(1, 56, 56, 32).permute(0, 3, 1, 2)
-
-    assert_with_pcc(torch_x, ttnn_x, 0.99999)
-
-
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
-def test_slice_alone(device, use_program_cache, reset_seeds):
-    torch_input_tensor = torch.randn(1, 64, 28, 28)
-    torch_y1, torch_y2 = torch_input_tensor.chunk(2, 1)
-    ttnn_input_tensor = torch.permute(torch_input_tensor, (0, 2, 3, 1))
-    ttnn_input_tensor = ttnn_input_tensor.reshape(
-        1,
-        1,
-        ttnn_input_tensor.shape[0] * ttnn_input_tensor.shape[1] * ttnn_input_tensor.shape[2],
-        ttnn_input_tensor.shape[3],
-    )
-    ttnn_x = ttnn.from_torch(ttnn_input_tensor, dtype=ttnn.bfloat16, layout=ttnn.TILE_LAYOUT, device=device)
-    ttnn_x = ttnn.to_layout(ttnn_x, ttnn.ROW_MAJOR_LAYOUT)
-    ttnn_y1 = ttnn_x[:, :, :, :32]
-    ttnn_y2 = ttnn_x[:, :, :, 32:64]
-    ttnn_y1 = ttnn.to_torch(ttnn_y1).reshape(1, 28, 28, 32).permute(0, 3, 1, 2)
-
-    assert_with_pcc(torch_y1, ttnn_y1, 0.99999)
-
-
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
-def test_silu_layout_issue(device, use_program_cache, reset_seeds):
-    torch_input_tensor = torch.randn(1, 64, 28, 28)
-    act = nn.SiLU(inplace=True)
-    torch_x = act(torch_input_tensor)
-    torch_y1, torch_y2 = torch_input_tensor.chunk(2, 1)
-
-    ttnn_input_tensor = torch.permute(torch_input_tensor, (0, 2, 3, 1))
-
-    ttnn_input_tensor = ttnn_input_tensor.reshape(
-        1,
-        1,
-        ttnn_input_tensor.shape[0] * ttnn_input_tensor.shape[1] * ttnn_input_tensor.shape[2],
-        ttnn_input_tensor.shape[3],
-    )
-    ttnn_x = ttnn.from_torch(ttnn_input_tensor, dtype=ttnn.bfloat16, layout=ttnn.TILE_LAYOUT, device=device)
-    ttnn_x = ttnn.silu(ttnn_x)
-    ttnn_x = ttnn.to_layout(ttnn_x, ttnn.ROW_MAJOR_LAYOUT)
-    ttnn_y1 = ttnn_x[:, :, :, :32]
-    ttnn_y2 = ttnn_x[:, :, :, 32:64]
-    # ttnn_x = ttnn.to_layout(ttnn_x, ttnn.ROW_MAJOR_LAYOUT)
-    # ttnn_x = ttnn.reshape(ttnn_x, (1, 28, 28, 64))
-
-    # ttnn_y1, ttnn_y2 = ttnn.split(ttnn_x, 2, 3)
-    ttnn_y1 = ttnn.to_torch(ttnn_y1).reshape(1, 28, 28, 32).permute(0, 3, 1, 2)
-
-    assert_with_pcc(torch_y1, ttnn_y1, 0.99999)
-
-
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
-def test_split_dumped(device, use_program_cache, reset_seeds):  # 0.005 for fp8 and 099 for fp16
-    torch_input_tensor = torch.randn(1, 64, 28, 28)
-    torch_y1, torch_y2 = torch_input_tensor.chunk(2, 1)
-
-    ttnn_input_tensor = torch.permute(torch_input_tensor, (0, 2, 3, 1))
-    ttnn_input_tensor = ttnn_input_tensor.reshape(
-        1,
-        1,
-        ttnn_input_tensor.shape[0] * ttnn_input_tensor.shape[1] * ttnn_input_tensor.shape[2],
-        ttnn_input_tensor.shape[3],
-    )
-    ttnn_x = ttnn.from_torch(ttnn_input_tensor, dtype=ttnn.bfloat8_b, layout=ttnn.TILE_LAYOUT, device=device)
-    ttnn_x = ttnn.to_layout(ttnn_x, ttnn.ROW_MAJOR_LAYOUT)
-    ttnn_x = ttnn.reshape(ttnn_x, (1, 28, 28, 64))
-    ttnn_y1 = ttnn_x[:, :, :, :32]
-    ttnn_y2 = ttnn_x[:, :, :, 32:64]
-
-    assert_with_pcc(torch_y1, ttnn.to_torch(ttnn_y1).reshape(1, 28, 28, 32).permute(0, 3, 1, 2), 0.99999)
-
-
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 16384}], indirect=True)
-def test_dtype_issue(device):
-    a = torch.randn((1, 256, 1, 49), dtype=torch.bfloat16)
-    a_ttnn = ttnn.from_torch(a, layout=ttnn.TILE_LAYOUT, dtype=ttnn.bfloat8_b)
-    print("bfp8", a_ttnn.dtype)
-    a_ttnn = ttnn.to_layout(a_ttnn, layout=ttnn.TILE_LAYOUT, dtype=ttnn.bfloat16)
-    print("bfp16", a_ttnn.dtype)
-    ttnn_output = ttnn.to_torch(a_ttnn)
-    assert_with_pcc(a, ttnn_output, 0.99999)
diff --git a/models/experimental/functional_yolov11/test/test_yolov11_demo.py b/models/experimental/functional_yolov11/test/test_yolov11_demo.py
deleted file mode 100644
index 0ea487f6980..00000000000
--- a/models/experimental/functional_yolov11/test/test_yolov11_demo.py
+++ /dev/null
@@ -1,87 +0,0 @@
-# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
-
-# SPDX-License-Identifier: Apache-2.0
-
-import pytest
-import ttnn
-import torch
-from tests.ttnn.utils_for_testing import assert_with_pcc
-import sys
-
-from models.experimental.functional_yolov11.reference import yolov11
-
-from models.experimental.functional_yolov11.tt.model_preprocessing import (
-    create_yolov11_input_tensors,
-    create_yolov11_model_parameters,
-)
-from models.experimental.functional_yolov11.tt import ttnn_yolov11
-import torch.nn as nn
-
-try:
-    sys.modules["ultralytics"] = yolov11
-    sys.modules["ultralytics.nn.tasks"] = yolov11
-    sys.modules["ultralytics.nn.modules.conv"] = yolov11
-    sys.modules["ultralytics.nn.modules.block"] = yolov11
-    sys.modules["ultralytics.nn.modules.head"] = yolov11
-
-except KeyError:
-    print("models.experimental.functional_yolov11.reference.yolov11 not found.")
-
-
-class Ensemble(nn.ModuleList):
-    def __init__(self):
-        super(Ensemble, self).__init__()
-
-    def forward(self, x, augment=False):
-        y = []
-        for module in self:
-            y.append(module(x, augment)[0])
-        y = torch.cat(y, 1)
-        return y, None
-
-
-def attempt_load(weights, map_location=None):
-    model = Ensemble()
-    for w in weights if isinstance(weights, list) else [weights]:
-        w = "models/experimental/functional_yolov11/reference/yolo11n.pt"
-        ckpt = torch.load(w, map_location=map_location)
-        model.append(ckpt["ema" if ckpt.get("ema") else "model"].float().eval())
-    for m in model.modules():
-        if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
-            m.inplace = True
-        elif type(m) is nn.Upsample:
-            m.recompute_scale_factor = None
-
-    if len(model) == 1:
-        return model[-1]
-    else:
-        for k in ["names", "stride"]:
-            setattr(model, k, getattr(model[-1], k))
-        return model
-
-
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
-def test_yolov11(device, use_program_cache, reset_seeds):
-    torch_input, ttnn_input = create_yolov11_input_tensors(device)
-
-    torch_model = attempt_load("yolov11n.pt", map_location="cpu")
-    state_dict = torch_model.state_dict()
-    torch_model = yolov11.YoloV11()
-    ds_state_dict = {k: v for k, v in state_dict.items()}
-    new_state_dict = {}
-    for (name1, parameter1), (name2, parameter2) in zip(torch_model.state_dict().items(), ds_state_dict.items()):
-        if isinstance(parameter2, torch.FloatTensor):
-            new_state_dict[name1] = parameter2
-    torch_model.load_state_dict(new_state_dict)
-    torch_model.eval()
-
-    torch_output = torch_model(torch_input)
-
-    parameters = create_yolov11_model_parameters(torch_model, torch_input, device=device)
-    ttnn_model = ttnn_yolov11.YoloV11(device, parameters)
-    ttnn_output = ttnn_model(ttnn_input)
-
-    ttnn_output = ttnn.to_torch(ttnn_output)
-    ttnn_output = ttnn_output.permute(0, 3, 1, 2)
-    ttnn_output = ttnn_output.reshape(torch_output.shape)
-    assert_with_pcc(torch_output, ttnn_output, 0.99999)
diff --git a/models/experimental/functional_yolov11/tests/test_yolov11.py b/models/experimental/functional_yolov11/tests/test_yolov11.py
new file mode 100644
index 00000000000..6bd1eeb76dd
--- /dev/null
+++ b/models/experimental/functional_yolov11/tests/test_yolov11.py
@@ -0,0 +1,125 @@
+# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+import sys
+import ttnn
+import time
+import torch
+import pytest
+import torch.nn as nn
+from loguru import logger
+from models.utility_functions import is_wormhole_b0
+from models.perf.perf_utils import prep_perf_report
+from models.experimental.functional_yolov11.tt import ttnn_yolov11
+from models.experimental.functional_yolov11.reference import yolov11
+from models.experimental.functional_yolov11.reference.yolov11 import attempt_load
+from models.utility_functions import enable_persistent_kernel_cache, disable_persistent_kernel_cache
+from models.perf.device_perf_utils import run_device_perf, check_device_perf, prep_device_perf_report
+from models.experimental.functional_yolov11.tt.model_preprocessing import (
+    create_yolov11_input_tensors,
+    create_yolov11_model_parameters,
+)
+
+try:
+    sys.modules["ultralytics"] = yolov11
+    sys.modules["ultralytics.nn.tasks"] = yolov11
+    sys.modules["ultralytics.nn.modules.conv"] = yolov11
+    sys.modules["ultralytics.nn.modules.block"] = yolov11
+    sys.modules["ultralytics.nn.modules.head"] = yolov11
+
+except KeyError:
+    print("models.experimental.functional_yolov11.reference.yolov11 not found.")
+
+
+def get_expected_times(name):
+    base = {"yolov11": (130.70, 0.594)}
+    return base[name]
+
+
+@pytest.mark.models_performance_bare_metal
+@pytest.mark.parametrize("device_params", [{"l1_small_size": 32768}], indirect=True)
+@pytest.mark.parametrize("batch_size", [(1)])
+@pytest.mark.parametrize("input_tensor", [torch.rand((1, 3, 640, 640))], ids=["input_tensor"])
+def test_yolov11(device, input_tensor, batch_size):
+    disable_persistent_kernel_cache()
+    torch_input, ttnn_input = create_yolov11_input_tensors(
+        device,
+        batch=input_tensor.shape[0],
+        input_channels=input_tensor.shape[1],
+        input_height=input_tensor.shape[2],
+        input_width=input_tensor.shape[3],
+    )
+    torch_model = attempt_load("yolo11n.pt", map_location="cpu")
+    state_dict = torch_model.state_dict()
+    torch_model = yolov11.YoloV11()
+    ds_state_dict = {k: v for k, v in state_dict.items()}
+    new_state_dict = {}
+    for (name1, parameter1), (name2, parameter2) in zip(torch_model.state_dict().items(), ds_state_dict.items()):
+        if isinstance(parameter2, torch.FloatTensor):
+            new_state_dict[name1] = parameter2
+    torch_model.load_state_dict(new_state_dict)
+    torch_model.eval()
+    parameters = create_yolov11_model_parameters(torch_model, torch_input, device=device)
+    model = ttnn_yolov11.YoloV11(device, parameters)
+    durations = []
+
+    for i in range(2):
+        start = time.time()
+        ttnn_model_output = model(ttnn_input)
+        end = time.time()
+        durations.append(end - start)
+        ttnn.deallocate(ttnn_model_output)
+        enable_persistent_kernel_cache()
+
+    inference_and_compile_time, inference_time, *_ = durations
+
+    expected_compile_time, expected_inference_time = get_expected_times("yolov11")
+
+    prep_perf_report(
+        model_name="models/experimental/functional_yolov11",
+        batch_size=batch_size,
+        inference_and_compile_time=inference_and_compile_time,
+        inference_time=inference_time,
+        expected_compile_time=expected_compile_time,
+        expected_inference_time=expected_inference_time,
+        comments="",
+        inference_time_cpu=0.0,
+    )
+
+    logger.info(f"Compile time: {inference_and_compile_time - inference_time}")
+    logger.info(f"Inference time: {inference_time}")
+    logger.info(f"Samples per second: {1 / inference_time * batch_size}")
+
+
+@pytest.mark.parametrize(
+    "batch_size, expected_perf",
+    [
+        [1, 81.94],
+    ],
+)
+@pytest.mark.models_device_performance_bare_metal
+def test_perf_device_bare_metal_yolov11(batch_size, expected_perf):
+    subdir = "ttnn_yolov11"
+    num_iterations = 1
+    margin = 0.03
+    expected_perf = expected_perf if is_wormhole_b0() else 0
+
+    command = f"pytest models/experimental/functional_yolov11/demo/demo.py::test_demo"
+    cols = ["DEVICE FW", "DEVICE KERNEL", "DEVICE BRISC KERNEL"]
+
+    inference_time_key = "AVG DEVICE KERNEL SAMPLES/S"
+    expected_perf_cols = {inference_time_key: expected_perf}
+
+    post_processed_results = run_device_perf(command, subdir, num_iterations, cols, batch_size)
+    expected_results = check_device_perf(post_processed_results, margin, expected_perf_cols)
+
+    logger.info(f"{expected_results}")
+
+    prep_device_perf_report(
+        model_name=f"ttnn_functional_yolov11{batch_size}",
+        batch_size=batch_size,
+        post_processed_results=post_processed_results,
+        expected_results=expected_results,
+        comments="",
+    )
diff --git a/models/experimental/functional_yolov11/test/test_yolov11_perfomant.py b/models/experimental/functional_yolov11/tests/test_yolov11_perfomant.py
similarity index 91%
rename from models/experimental/functional_yolov11/test/test_yolov11_perfomant.py
rename to models/experimental/functional_yolov11/tests/test_yolov11_perfomant.py
index de99d2d28f6..10658d8ee58 100644
--- a/models/experimental/functional_yolov11/test/test_yolov11_perfomant.py
+++ b/models/experimental/functional_yolov11/tests/test_yolov11_perfomant.py
@@ -3,11 +3,9 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import pytest
-import ttnn
-import torch
 from tests.ttnn.utils_for_testing import assert_with_pcc
 from models.utility_functions import run_for_wormhole_b0
-from models.experimental.functional_yolov11.test.yolov11_perfomant import (
+from models.experimental.functional_yolov11.tests.yolov11_perfomant import (
     run_yolov11_trace_inference,
     run_yolov11_trace_2cqs_inference,
 )
diff --git a/models/experimental/functional_yolov11/test/yolov11_perfomant.py b/models/experimental/functional_yolov11/tests/yolov11_perfomant.py
similarity index 96%
rename from models/experimental/functional_yolov11/test/yolov11_perfomant.py
rename to models/experimental/functional_yolov11/tests/yolov11_perfomant.py
index 3ed329902fc..fd119146196 100644
--- a/models/experimental/functional_yolov11/test/yolov11_perfomant.py
+++ b/models/experimental/functional_yolov11/tests/yolov11_perfomant.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
 
 # SPDX-License-Identifier: Apache-2.0
 
@@ -6,7 +6,7 @@
 import torch
 import ttnn
 from models.utility_functions import is_wormhole_b0, profiler
-from models.experimental.functional_yolov11.test.yolov11_test_infra import create_test_infra
+from models.experimental.functional_yolov11.tests.yolov11_test_infra import create_test_infra
 
 try:
     from tracy import signpost
@@ -56,7 +56,6 @@ def run_yolov11_trace_inference(
     trace_input_addr = ttnn.buffer_address(test_infra.input_tensor)
     tid = ttnn.begin_trace_capture(device, cq_id=0)
     test_infra.run()
-    print("run3")
     tt_image_res = ttnn.allocate_tensor_on_device(spec, device)
     ttnn.end_trace_capture(device, tid, cq_id=0)
     assert trace_input_addr == ttnn.buffer_address(tt_image_res)
diff --git a/models/experimental/functional_yolov11/test/yolov11_test_infra.py b/models/experimental/functional_yolov11/tests/yolov11_test_infra.py
similarity index 79%
rename from models/experimental/functional_yolov11/test/yolov11_test_infra.py
rename to models/experimental/functional_yolov11/tests/yolov11_test_infra.py
index 71fc4e50019..09edeebc7c9 100644
--- a/models/experimental/functional_yolov11/test/yolov11_test_infra.py
+++ b/models/experimental/functional_yolov11/tests/yolov11_test_infra.py
@@ -1,17 +1,14 @@
-# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
 
 # SPDX-License-Identifier: Apache-2.0
 
 from loguru import logger
-import os
-import pytest
 import torch
-import torchvision
 from tests.ttnn.utils_for_testing import assert_with_pcc
 import ttnn
 from models.experimental.functional_yolov11.reference import yolov11
-from models.experimental.functional_yolov11.reference.yolov11 import YoloV11 as torch_yolov11
-from models.experimental.functional_yolov11.tt.ttnn_yolov11 import YoloV11 as ttnn_yolov11
+from models.experimental.functional_yolov11.tt import ttnn_yolov11
+from models.experimental.functional_yolov11.reference.yolov11 import attempt_load
 import sys
 from models.utility_functions import (
     is_wormhole_b0,
@@ -35,38 +32,6 @@
     print("models.experimental.functional_yolov11.reference.yolov11 not found.")
 
 
-class Ensemble(nn.ModuleList):
-    def __init__(self):
-        super(Ensemble, self).__init__()
-
-    def forward(self, x, augment=False):
-        y = []
-        for module in self:
-            y.append(module(x, augment)[0])
-        y = torch.cat(y, 1)
-        return y, None
-
-
-def attempt_load(weights, map_location=None):
-    model = Ensemble()
-    for w in weights if isinstance(weights, list) else [weights]:
-        w = "models/experimental/functional_yolov11/reference/yolo11n.pt"
-        ckpt = torch.load(w, map_location=map_location)
-        model.append(ckpt["ema" if ckpt.get("ema") else "model"].float().eval())
-    for m in model.modules():
-        if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
-            m.inplace = True
-        elif type(m) is nn.Upsample:
-            m.recompute_scale_factor = None
-
-    if len(model) == 1:
-        return model[-1]
-    else:
-        for k in ["names", "stride"]:
-            setattr(model, k, getattr(model[-1], k))
-        return model
-
-
 def load_yolov11_model():
     torch_model = attempt_load("yolov11n.pt", map_location="cpu")
     state_dict = torch_model.state_dict()
@@ -97,7 +62,7 @@ def __init__(
         torch_model = load_yolov11_model()
         parameters = create_yolov11_model_parameters(torch_model, self.torch_input, device=device)
         self.torch_output = torch_model(self.torch_input)
-        self.ttnn_yolov11_model = ttnn_yolov11(device, parameters)
+        self.ttnn_yolov11_model = ttnn_yolov11.YoloV11(device, parameters)
 
     def run(self):
         self.output_tensor = self.ttnn_yolov11_model(self.input_tensor)
@@ -150,7 +115,6 @@ def setup_dram_sharded_input(self, device, torch_input_tensor=None, mesh_mapper=
     def validate(self, output_tensor=None):
         output_tensor = self.output_tensor if output_tensor is None else output_tensor
         output_tensor = ttnn.to_torch(self.output_tensor)
-        # output_tensor = torch.permute(output_tensor, (0, 3, 1, 2))
         output_tensor = output_tensor.reshape((self.torch_output).shape)
 
         valid_pcc = 0.98
diff --git a/models/experimental/functional_yolov11/tt/model_preprocessing.py b/models/experimental/functional_yolov11/tt/model_preprocessing.py
index 6a042b9f12a..5ac679d9900 100644
--- a/models/experimental/functional_yolov11/tt/model_preprocessing.py
+++ b/models/experimental/functional_yolov11/tt/model_preprocessing.py
@@ -1,5 +1,6 @@
 # SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
-# SPDX-License-Identifier: Apache-2.0]
+
+# SPDX-License-Identifier: Apache-2.0
 
 import torch
 import ttnn
@@ -11,7 +12,6 @@
 
 
 def create_yolov11_input_tensors(device, batch=1, input_channels=3, input_height=224, input_width=224):
-    # torch.manual_seed(20)
     torch_input_tensor = torch.randn(batch, input_channels, input_height, input_width)
     ttnn_input_tensor = torch.permute(torch_input_tensor, (0, 2, 3, 1))
     ttnn_input_tensor = ttnn_input_tensor.reshape(
@@ -39,12 +39,8 @@ def make_anchors(device, feats, strides, grid_cell_offset=0.5):
     b = torch.cat(stride_tensor).transpose(0, 1)
 
     return (
-        ttnn.from_torch(
-            a, dtype=ttnn.bfloat16, layout=ttnn.TILE_LAYOUT, device=device, memory_config=ttnn.L1_MEMORY_CONFIG
-        ),
-        ttnn.from_torch(
-            b, dtype=ttnn.bfloat16, layout=ttnn.TILE_LAYOUT, device=device, memory_config=ttnn.L1_MEMORY_CONFIG
-        ),
+        ttnn.from_torch(a, dtype=ttnn.bfloat16, layout=ttnn.TILE_LAYOUT, device=device),
+        ttnn.from_torch(b, dtype=ttnn.bfloat16, layout=ttnn.TILE_LAYOUT, device=device),
     )
 
 
@@ -97,17 +93,6 @@ def __delattr__(self, key):
 
 
 def preprocess(d: dict, weights_path: str, bias_path: str):
-    """
-    Accesses a tensor within a nested dictionary using a path string.
-
-    Args:
-        d: The dictionary containing the nested structure.
-        path: A string representing the path to the tensor,
-              e.g., "conv1.module.conv.weight".
-
-    Returns:
-        The tensor found at the specified path, or None if not found.
-    """
     tt_bias = None
     weight_keys = weights_path.split(".")
     bias_keys = bias_path.split(".")
@@ -162,7 +147,7 @@ def create_yolov11_model_parameters(model: YoloV11, input_tensor: torch.Tensor,
         input_tensor.shape[3] // 8,
         input_tensor.shape[3] // 16,
         input_tensor.shape[3] // 32,
-    ]  # Values depends on input resolution. Current: 224x224
+    ]
     strides = [8.0, 16.0, 32.0]
 
     anchors, strides = make_anchors(device, feats, strides)  # Optimization: Processing make anchors outside model run
@@ -187,7 +172,7 @@ def create_yolov11_model_parameters_detect(
         model=model, run_model=lambda model: model(input_tensor_1, input_tensor_2, input_tensor_3), device=None
     )
 
-    feats = [28, 14, 7]  # Values depends on input resolution. Current: 224x224
+    feats = [28, 14, 7]
     strides = [8.0, 16.0, 32.0]
 
     anchors, strides = make_anchors(device, feats, strides)  # Optimization: Processing make anchors outside model run
diff --git a/models/experimental/functional_yolov11/tt/ttnn_yolov11.py b/models/experimental/functional_yolov11/tt/ttnn_yolov11.py
index ceabf111b65..72ad8b6b944 100644
--- a/models/experimental/functional_yolov11/tt/ttnn_yolov11.py
+++ b/models/experimental/functional_yolov11/tt/ttnn_yolov11.py
@@ -1,12 +1,13 @@
 # SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
+
 # SPDX-License-Identifier: Apache-2.0
+
 import ttnn
 import math
 from tt_lib.utils import (
     _nearest_y,
 )
 from tests.ttnn.ttnn_utility_fuction import get_shard_grid_from_num_cores
-import torch
 
 
 class Yolov11_Conv2D:
@@ -107,56 +108,6 @@ def __call__(self, x):
         return x
 
 
-def Yolov11_shard_SiLU(device, x, ncores=64):
-    input_2d_height = x.shape.with_tile_padding()[2]
-    input_2d_width = x.shape.with_tile_padding()[3]
-
-    input_2d_height_padded = _nearest_y(input_2d_height, ncores * 32)
-
-    shard_height = math.ceil(input_2d_height_padded / ncores)
-    shard_grid = get_shard_grid_from_num_cores(ncores, device)
-    shard_width = input_2d_width
-    shard_orientation = ttnn.ShardOrientation.ROW_MAJOR
-    tensor_memory_layout = ttnn.TensorMemoryLayout.HEIGHT_SHARDED
-
-    shard_spec = ttnn.ShardSpec(shard_grid, (shard_height, shard_width), shard_orientation, False)
-
-    in_sharded_mem_config = ttnn.MemoryConfig(tensor_memory_layout, ttnn.BufferType.L1, shard_spec)
-
-    x = ttnn.to_memory_config(x, memory_config=in_sharded_mem_config)
-
-    x = ttnn.silu(x, memory_config=in_sharded_mem_config)
-    return x
-
-
-def Yolov11_shard_upsample(device, x):
-    shard_grid = ttnn.CoreRangeSet(
-        {
-            ttnn.CoreRange(
-                ttnn.CoreCoord(0, 0),
-                ttnn.CoreCoord(7, 5),
-            ),
-            ttnn.CoreRange(
-                ttnn.CoreCoord(0, 6),
-                ttnn.CoreCoord(0, 6),
-            ),
-        }
-    )
-    shard_height = math.ceil(x.shape[0] * x.shape[1] * x.shape[2] / 49)
-    shard_width = x.shape[-1]
-    shard_spec = ttnn.ShardSpec(shard_grid, (shard_height, shard_width), ttnn.ShardOrientation.ROW_MAJOR)
-    in_sharded_mem_config = ttnn.MemoryConfig(ttnn.TensorMemoryLayout.HEIGHT_SHARDED, ttnn.BufferType.L1, shard_spec)
-    x = ttnn.to_memory_config(x, memory_config=in_sharded_mem_config)
-    shard_height_out = shard_height * 2 * 2  # scale_factor=2
-    shard_spec_out = ttnn.ShardSpec(shard_grid, (shard_height_out, shard_width), ttnn.ShardOrientation.ROW_MAJOR)
-    out_sharded_mem_config = ttnn.MemoryConfig(
-        ttnn.TensorMemoryLayout.HEIGHT_SHARDED, ttnn.types.BufferType.L1, shard_spec_out
-    )
-    x = ttnn.upsample(x, scale_factor=2, memory_config=out_sharded_mem_config)
-    x = ttnn.sharded_to_interleaved(x, memory_config=ttnn.L1_MEMORY_CONFIG)
-    return x
-
-
 def sharded_concat(input_tensors, num_cores=64, dim=3):  # expected input tensors to be in fp16, RM, same (h*w)
     shard_grid = ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))})
     in_shard_width = input_tensors[0].shape[-1]
@@ -283,7 +234,7 @@ def __call__(self, device, x):
 
         k1 = self.k1(device, x1)
         k2 = self.k2(device, k1)
-        use_shard_concat = False  # fps drop due to layout conversion
+        use_shard_concat = False
         if use_shard_concat:
             x2 = ttnn.to_layout(x2, ttnn.ROW_MAJOR_LAYOUT)
             x2 = ttnn.to_dtype(x2, ttnn.bfloat16)
@@ -537,9 +488,6 @@ def __call__(self, device, y1, y2, y3):
         y2 = ttnn.concat((x2, x5), -1, memory_config=ttnn.L1_MEMORY_CONFIG)
         y3 = ttnn.concat((x3, x6), -1, memory_config=ttnn.L1_MEMORY_CONFIG)
 
-        y1_reshaped = ttnn.reshape(y1, (y1.shape[0], y1.shape[2], y1.shape[-1]))
-        y2_reshaped = ttnn.reshape(y2, (y2.shape[0], y2.shape[2], y2.shape[-1]))
-        y3_reshaped = ttnn.reshape(y3, (y3.shape[0], y3.shape[2], y3.shape[-1]))
         y = ttnn.concat((y1, y2, y3), dim=2, memory_config=ttnn.L1_MEMORY_CONFIG)
         y = ttnn.squeeze(y, dim=0)
         ya, yb = y[:, :, :64], y[:, :, 64:144]
@@ -561,7 +509,6 @@ def __call__(self, device, y1, y2, y3):
         c = self.dfl(ya)
         ttnn.deallocate(ya)
         c = ttnn.sharded_to_interleaved(c, memory_config=ttnn.L1_MEMORY_CONFIG)
-
         c = ttnn.to_layout(c, layout=ttnn.ROW_MAJOR_LAYOUT)
         c = ttnn.permute(c, (0, 3, 1, 2))
         c = ttnn.reshape(c, (c.shape[0], 1, 4, int(c.shape[3] / 4)))
@@ -569,6 +516,8 @@ def __call__(self, device, y1, y2, y3):
         c1, c2 = c[:, :2, :], c[:, 2:4, :]
 
         anchor, strides = self.anchors, self.strides
+        anchor = ttnn.to_memory_config(anchor, memory_config=ttnn.L1_MEMORY_CONFIG)
+        strides = ttnn.to_memory_config(strides, memory_config=ttnn.L1_MEMORY_CONFIG)
         c1 = ttnn.to_layout(c1, layout=ttnn.TILE_LAYOUT)
         c2 = ttnn.to_layout(c2, layout=ttnn.TILE_LAYOUT)
 
@@ -659,7 +608,6 @@ def __call__(self, x):
         x = self.c2psa(self.device, x)
         x10 = x
         x = ttnn.to_layout(x, layout=ttnn.ROW_MAJOR_LAYOUT)
-        # x = ttnn.to_dtype(x, ttnn.bfloat16)
         x = ttnn.reshape(x, (x.shape[0], int(math.sqrt(x.shape[2])), int(math.sqrt(x.shape[2])), x.shape[3]))
         nhw = x.shape[0] * x.shape[1] * x.shape[2]
         num_cores = determine_num_cores_for_upsample(nhw, x.shape[2])
@@ -671,17 +619,12 @@ def __call__(self, x):
             x = ttnn.reshard(x, shardspec)
         else:
             x = ttnn.interleaved_to_sharded(x, shardspec)
-
         x = ttnn.upsample(x, scale_factor=2, memory_config=x.memory_config())  # 11
         if x.is_sharded():
             x = ttnn.sharded_to_interleaved(x, memory_config=ttnn.L1_MEMORY_CONFIG)
         x = ttnn.reshape(x, (1, 1, x.shape[0] * x.shape[1] * x.shape[2], x.shape[3]))
         x6 = ttnn.to_layout(x6, layout=ttnn.ROW_MAJOR_LAYOUT)
-
-        # x = sharded_concat([x,x6]) # unequal channels( sharded_concat is not applicable)
         shard_height = (x[0].shape[2] + 64 - 1) // 64
-        print("shard height is ", shard_height)
-        print("x and x6 sahpes are", x.shape, x6.shape)
         input_sharded_memory_config_1 = ttnn.create_sharded_memory_config(
             (shard_height, x.shape[-1]),
             core_grid=ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}),
@@ -694,22 +637,19 @@ def __call__(self, x):
             strategy=ttnn.ShardStrategy.HEIGHT,
             use_height_and_width_as_shard_shape=True,
         )
-        # x = ttnn.to_memory_config(x,input_sharded_memory_config_1)
-        # x6 = ttnn.to_memory_config(x6,input_sharded_memory_config_2)
+        x = ttnn.to_memory_config(x, input_sharded_memory_config_1)
+        x6 = ttnn.to_memory_config(x6, input_sharded_memory_config_2)
         out_sharded_memory_config_ = ttnn.create_sharded_memory_config(
             (shard_height, x.shape[-1] + x6.shape[-1]),
             core_grid=ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}),
             strategy=ttnn.ShardStrategy.HEIGHT,
             use_height_and_width_as_shard_shape=True,
         )
-        x = ttnn.concat((x, x6), -1, memory_config=ttnn.L1_MEMORY_CONFIG)
+        x = ttnn.concat((x, x6), -1, memory_config=out_sharded_memory_config_)
 
         ttnn.deallocate(x6)
-        # if x.shape[2]==196:
-        #     x = ttnn.sharded_to_interleaved(x, memory_config=ttnn.L1_MEMORY_CONFIG)
-        #     x = ttnn.to_layout(x, layout=ttnn.TILE_LAYOUT)
-        print(" after x and x6 concat", x.shape)
-        # return x
+        if x.shape[2] == 196:
+            x = ttnn.sharded_to_interleaved(x, memory_config=ttnn.L1_MEMORY_CONFIG)
         x = self.c3k2_5(self.device, x)  # 13
         x13 = x
         x = ttnn.to_layout(x, layout=ttnn.ROW_MAJOR_LAYOUT)
@@ -730,26 +670,16 @@ def __call__(self, x):
         x = ttnn.reshape(x, (1, 1, x.shape[0] * x.shape[1] * x.shape[2], x.shape[3]))
         x4 = ttnn.to_layout(x4, layout=ttnn.ROW_MAJOR_LAYOUT)
         x = sharded_concat([x, x4])
-        # x = ttnn.concat((x, x4), -1, memory_config=ttnn.L1_MEMORY_CONFIG)  # 15
         ttnn.deallocate(x4)
-        # x = ttnn.to_layout(x, layout=ttnn.TILE_LAYOUT)
         x = self.c3k2_6(self.device, x)  # 16
         x16 = x
         x = self.conv7(self.device, x)  # 17
-        # x = ttnn.to_layout(x, layout=ttnn.ROW_MAJOR_LAYOUT)
-        # x = ttnn.to_dtype(x, ttnn.bfloat16)
-        # x = ttnn.to_layout(x, layout=ttnn.TILE_LAYOUT)
-        # print("x and x13 shapes are", x.shape, x13.shape, x.dtype, x13.dtype, x.layout, x13.layout)
         x = ttnn.concat((x, x13), -1, memory_config=ttnn.L1_MEMORY_CONFIG)  # 18
         ttnn.deallocate(x13)
         x = self.c3k2_7(self.device, x)  # 19
         x19 = x
-        x = self.conv8(self.device, x)  # 20 #16
-        # x = ttnn.to_layout(x, layout=ttnn.ROW_MAJOR_LAYOUT)
-        # x = ttnn.to_dtype(x, ttnn.bfloat16)
-        print("x and x10 shapes are", x.shape, x10.shape, x.dtype, x10.dtype, x.layout, x10.layout)
+        x = self.conv8(self.device, x)
         x = ttnn.concat((x, x10), -1, memory_config=ttnn.L1_MEMORY_CONFIG)  # 21
-        print("output cncat shape is", x.shape)
         ttnn.deallocate(x10)
         x = self.c3k2_8(self.device, x)  # 22
         x22 = x
diff --git a/tests/scripts/run_performance.sh b/tests/scripts/run_performance.sh
index bd19f0a398f..326312531e2 100755
--- a/tests/scripts/run_performance.sh
+++ b/tests/scripts/run_performance.sh
@@ -23,6 +23,9 @@ run_perf_models_other() {
         env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/yolov4/tests/test_perf_yolo.py -m $test_marker
 
         env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/wormhole/distilbert/tests/test_perf_distilbert.py -m $test_marker
+
+        #yolov11
+        env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/experimental/functional_yolov11/tests/test_yolov11.py -m $test_marker
     fi
 
     env pytest -n auto tests/ttnn/integration_tests/bert/test_performance.py -m $test_marker
@@ -103,6 +106,7 @@ run_device_perf_models() {
 
     env pytest models/demos/roberta/tests/ -m $test_marker
 
+
     if [ "$tt_arch" == "grayskull" ]; then
         #TODO(MO): Until #6560 is fixed, GS device profiler test are grouped with
         #Model Device perf regression tests to make sure thy run on no-soft-reset BMs
@@ -133,6 +137,8 @@ run_device_perf_models() {
         env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/yolov4/tests/ -m $test_marker
 
         env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/wormhole/distilbert/tests -m $test_marker
+
+        env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/experimental/functional_yolov11/tests/ -m $test_marker
     fi
 
     ## Merge all the generated reports
diff --git a/tests/scripts/run_python_model_tests.sh b/tests/scripts/run_python_model_tests.sh
index 576ef139fc7..0506dfbb2a8 100755
--- a/tests/scripts/run_python_model_tests.sh
+++ b/tests/scripts/run_python_model_tests.sh
@@ -43,6 +43,9 @@ run_python_model_tests_wormhole_b0() {
     # Mamba
     WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -svv models/demos/wormhole/mamba/tests/test_residual_block.py -k "pretrained_weight_false"
 
+    # Yolov11
+    WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest tests/ttnn/integration_tests/yolov11/test_ttnn_yolo_v11.py
+
     # Llama3.1-8B
     llama8b=/mnt/MLPerf/tt_dnn-models/llama/Meta-Llama-3.1-8B-Instruct/
     # Llama3.2-1B
diff --git a/tests/scripts/single_card/run_single_card_demo_tests.sh b/tests/scripts/single_card/run_single_card_demo_tests.sh
index e7a8e492122..e85fe1044f5 100755
--- a/tests/scripts/single_card/run_single_card_demo_tests.sh
+++ b/tests/scripts/single_card/run_single_card_demo_tests.sh
@@ -61,6 +61,9 @@ run_common_func_tests() {
   #RoBERTa
   pytest --disable-warnings models/demos/roberta/demo/demo.py --timeout 600; fail+=$?
 
+  # Yolov11
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest --disable-warnings models/experimental/functional_yolov11/demo/demo.py --timeout 600; fail+=$?
+
   return $fail
 }
 
diff --git a/tests/ttnn/integration_tests/yolov11/test_ttnn_bottleneck.py b/tests/ttnn/integration_tests/yolov11/test_ttnn_bottleneck.py
deleted file mode 100644
index 3aee44b8e84..00000000000
--- a/tests/ttnn/integration_tests/yolov11/test_ttnn_bottleneck.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
-# SPDX-License-Identifier: Apache-2.0
-import pytest
-import ttnn
-from tests.ttnn.utils_for_testing import assert_with_pcc
-from models.experimental.functional_yolov11.tt.model_preprocessing import (
-    create_yolov11_input_tensors,
-    create_yolov11_model_parameters,
-)
-from models.experimental.functional_yolov11.reference.yolov11 import Bottleneck as torch_bottleneck
-from models.experimental.functional_yolov11.tt.ttnn_yolov11 import Bottleneck as ttnn_bottleneck
-
-
-@pytest.mark.parametrize(
-    "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape",
-    [
-        ([16, 8], [8, 16], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 16, 56, 56]),  # 1
-        ([32, 16], [16, 32], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 32, 38, 28]),  # 2
-        ([32, 32], [32, 32], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 32, 14, 14]),  # 3
-        ([64, 64], [64, 64], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 64, 7, 7]),
-        ([64, 32], [32, 64], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 64, 14, 14]),
-        ([32, 16], [16, 32], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 32, 28, 28]),
-        ([64, 32], [32, 64], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 64, 14, 14]),
-        ([64, 64], [64, 64], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 64, 7, 7]),
-    ],
-)
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
-def test_yolo_v11_bottleneck(
-    device,
-    use_program_cache,
-    reset_seeds,
-    in_channel,
-    out_channel,
-    kernel,
-    stride,
-    padding,
-    dilation,
-    groups,
-    fwd_input_shape,
-):
-    torch_module = torch_bottleneck(in_channel, out_channel, kernel, stride, padding, dilation, groups)
-    torch_module.eval()
-    torch_input, ttnn_input = create_yolov11_input_tensors(
-        device,
-        batch=fwd_input_shape[0],
-        input_channels=fwd_input_shape[1],
-        input_height=fwd_input_shape[2],
-        input_width=fwd_input_shape[3],
-    )
-    ttnn_input = ttnn.to_device(ttnn_input, device=device)
-    ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG)
-    torch_output = torch_module(torch_input)
-    parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device)
-    ttnn_module = ttnn_bottleneck(device=device, parameter=parameters.conv_args, conv_pt=parameters)
-    ttnn_output = ttnn_module(x=ttnn_input, device=device)
-    ttnn_output = ttnn.to_torch(ttnn_output)
-    ttnn_output = ttnn_output.permute(0, 3, 1, 2)
-    ttnn_output = ttnn_output.reshape(torch_output.shape)
-    assert_with_pcc(torch_output, ttnn_output, 0.99999)
diff --git a/tests/ttnn/integration_tests/yolov11/test_ttnn_c2psa.py b/tests/ttnn/integration_tests/yolov11/test_ttnn_c2psa.py
deleted file mode 100644
index 25b6bbe79f5..00000000000
--- a/tests/ttnn/integration_tests/yolov11/test_ttnn_c2psa.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
-# SPDX-License-Identifier: Apache-2.0
-import pytest
-import ttnn
-import torch
-from tests.ttnn.utils_for_testing import assert_with_pcc
-from models.experimental.functional_yolov11.tt.model_preprocessing import (
-    create_yolov11_input_tensors,
-    create_yolov11_model_parameters,
-)
-from models.experimental.functional_yolov11.reference.yolov11 import C2PSA as torch_c2psa_block
-from models.experimental.functional_yolov11.tt.ttnn_yolov11 import C2PSA as ttnn_c2psa_block
-
-
-@pytest.mark.parametrize(
-    "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape",
-    [
-        (
-            [256, 256, 128, 128, 128, 128, 256],
-            [256, 256, 256, 128, 128, 256, 128],
-            [1, 1, 1, 1, 3, 1, 1],
-            [1, 1, 1, 1, 1, 1, 1],
-            [0, 0, 0, 0, 1, 0, 0],
-            [1, 1, 1, 1, 1, 1, 1],
-            [1, 1, 1, 1, 128, 1, 1],
-            [1, 256, 7, 7],
-        ),
-    ],
-)
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
-def test_yolo_v11_c2psa_block(
-    device,
-    use_program_cache,
-    reset_seeds,
-    in_channel,
-    out_channel,
-    kernel,
-    stride,
-    padding,
-    dilation,
-    groups,
-    fwd_input_shape,
-):
-    torch_module = torch_c2psa_block(in_channel, out_channel, kernel, stride, padding, dilation, groups)
-    torch_module.eval()
-    torch_input, ttnn_input = create_yolov11_input_tensors(
-        device,
-        batch=fwd_input_shape[0],
-        input_channels=fwd_input_shape[1],
-        input_height=fwd_input_shape[2],
-        input_width=fwd_input_shape[3],
-    )
-    ttnn_input = ttnn.to_device(ttnn_input, device=device)
-    ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG)
-    torch_output = torch_module(torch_input)
-    parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device)
-    ttnn_module = ttnn_c2psa_block(device=device, parameter=parameters.conv_args, conv_pt=parameters)
-    ttnn_output = ttnn_module(x=ttnn_input, device=device)
-    ttnn_output = ttnn.to_torch(ttnn_output)
-    ttnn_output = ttnn_output.permute(0, 3, 1, 2)
-    ttnn_output = ttnn_output.reshape(torch_output.shape)
-    assert_with_pcc(torch_output, ttnn_output, 0.99999)
diff --git a/tests/ttnn/integration_tests/yolov11/test_ttnn_c3k.py b/tests/ttnn/integration_tests/yolov11/test_ttnn_c3k.py
deleted file mode 100644
index 7db507db01f..00000000000
--- a/tests/ttnn/integration_tests/yolov11/test_ttnn_c3k.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
-# SPDX-License-Identifier: Apache-2.0
-
-import pytest
-import ttnn
-import torch
-from tests.ttnn.utils_for_testing import assert_with_pcc
-from models.experimental.functional_yolov11.tt.model_preprocessing import (
-    create_yolov11_input_tensors,
-    create_yolov11_model_parameters,
-)
-from models.experimental.functional_yolov11.reference.yolov11 import C3k as torch_c3k
-from models.experimental.functional_yolov11.tt.ttnn_yolov11 import C3K as ttnn_c3k
-
-
-@pytest.mark.parametrize(
-    "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape",
-    [
-        (
-            [64, 64, 64, 32, 32, 32, 32],
-            [32, 32, 64, 32, 32, 32, 32],
-            [1, 1, 1, 3, 3, 3, 3],
-            [1, 1, 1, 1, 1, 1, 1],
-            [0, 0, 0, 1, 1, 1, 1],
-            [1, 1, 1, 1, 1, 1, 1],
-            [1, 1, 1, 1, 1, 1, 1],
-            [1, 64, 14, 14],
-        ),
-        # (
-        #     [128, 128, 128, 64, 64, 64, 64],
-        #     [64, 64, 128, 64, 64, 64, 64],
-        #     [1, 1, 1, 3, 3, 3, 3],
-        #     [1, 1, 1, 1, 1, 1, 1],
-        #     [0, 0, 0, 1, 1, 1, 1],
-        #     [1, 1, 1, 1, 1, 1, 1],
-        #     [1, 1, 1, 1, 1, 1, 1],
-        #     [1, 128, 7, 7],
-        # ),
-    ],
-)
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
-def test_yolo_v11_c3k(
-    device,
-    use_program_cache,
-    reset_seeds,
-    in_channel,
-    out_channel,
-    kernel,
-    stride,
-    padding,
-    dilation,
-    groups,
-    fwd_input_shape,
-):
-    torch_module = torch_c3k(in_channel, out_channel, kernel, stride, padding, dilation, groups)
-    torch_module.eval()
-    torch_input, ttnn_input = create_yolov11_input_tensors(
-        device,
-        batch=fwd_input_shape[0],
-        input_channels=fwd_input_shape[1],
-        input_height=fwd_input_shape[2],
-        input_width=fwd_input_shape[3],
-    )
-    ttnn_input = ttnn.to_device(ttnn_input, device=device)
-    ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG)
-    torch_output = torch_module(torch_input)
-    parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device)
-    ttnn_module = ttnn_c3k(device=device, parameter=parameters.conv_args, conv_pt=parameters)
-    ttnn_output = ttnn_module(x=ttnn_input, device=device)  # ttnn.Shape([1, 1, 224, 64])
-    ttnn_output = ttnn.to_torch(ttnn_output)
-    ttnn_output = ttnn_output.permute(0, 3, 1, 2)
-    ttnn_output = ttnn_output.reshape(torch_output.shape)
-    assert_with_pcc(torch_output, ttnn_output, 0.99999)
diff --git a/tests/ttnn/integration_tests/yolov11/test_ttnn_c3k2.py b/tests/ttnn/integration_tests/yolov11/test_ttnn_c3k2.py
deleted file mode 100644
index 46523ba7217..00000000000
--- a/tests/ttnn/integration_tests/yolov11/test_ttnn_c3k2.py
+++ /dev/null
@@ -1,144 +0,0 @@
-# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
-# SPDX-License-Identifier: Apache-2.0
-
-import pytest
-import ttnn
-import torch
-from tests.ttnn.utils_for_testing import assert_with_pcc
-from models.experimental.functional_yolov11.tt.model_preprocessing import (
-    create_yolov11_input_tensors,
-    create_yolov11_model_parameters,
-)
-from models.experimental.functional_yolov11.reference.yolov11 import C3k2 as torch_c3k2
-from models.experimental.functional_yolov11.tt.ttnn_yolov11 import C3k2 as ttnn_c3k2
-
-
-@pytest.mark.parametrize(
-    "in_channel, out_channel, kernel, stride, padding, dilation, groups,is_bk_enabled,fwd_input_shape",
-    [
-        # (
-        #     [32, 48, 16, 8],
-        #     [32, 64, 8, 16],
-        #     [1, 1, 3, 3],
-        #     [1, 1, 1, 1],
-        #     [0, 0, 1, 1],
-        #     [1, 1, 1, 1],
-        #     [1, 1, 1, 1],
-        #     True,
-        #     [1, 32, 56, 56],
-        # ),
-        # (
-        #     [64, 96, 32, 16],
-        #     [64, 128, 16, 32],
-        #     [1, 1, 3, 3],
-        #     [1, 1, 1, 1],
-        #     [0, 0, 1, 1],
-        #     [1, 1, 1, 1],
-        #     [1, 1, 1, 1],
-        #     True,
-        #     [1, 64, 28, 28],
-        # ),
-        (
-            [128, 192, 64, 64, 64, 32, 32, 32, 32],
-            [128, 128, 32, 32, 64, 32, 32, 32, 32],
-            [1, 1, 1, 1, 1, 3, 3, 3, 3],
-            [1, 1, 1, 1, 1, 1, 1, 1, 1],
-            [0, 0, 0, 0, 0, 1, 1, 1, 1],
-            [1, 1, 1, 1, 1, 1, 1, 1, 1],
-            [1, 1, 1, 1, 1, 1, 1, 1, 1],
-            False,
-            [1, 128, 14, 14],
-        ),
-        # (
-        #     [256, 384, 128, 128, 128, 64, 64, 64, 64],
-        #     [256, 256, 64, 64, 128, 64, 64, 64, 64],
-        #     [1, 1, 1, 1, 1, 3, 3, 3, 3],
-        #     [1, 1, 1, 1, 1, 1, 1, 1, 1],
-        #     [0, 0, 0, 0, 0, 1, 1, 1, 1],
-        #     [1, 1, 1, 1, 1, 1, 1, 1, 1],
-        #     [1, 1, 1, 1, 1, 1, 1, 1, 1],
-        #     False,
-        #     [1, 256, 7, 7],
-        # ),
-        # (
-        #     [384, 192, 64, 32],
-        #     [128, 128, 32, 64],
-        #     [1, 1, 3, 3],
-        #     [1, 1, 1, 1],
-        #     [0, 0, 1, 1],
-        #     [1, 1, 1, 1],
-        #     [1, 1, 1, 1],
-        #     True,
-        #     [1, 384, 14, 14],
-        # ),
-        # (
-        #     [256, 96, 32, 16],
-        #     [64, 64, 16, 32],
-        #     [1, 1, 3, 3],
-        #     [1, 1, 1, 1],
-        #     [0, 0, 1, 1],
-        #     [1, 1, 1, 1],
-        #     [1, 1, 1, 1],
-        #     True,
-        #     [1, 256, 28, 28],
-        # ),
-        # (
-        #     [192, 192, 64, 32],
-        #     [128, 128, 32, 64],
-        #     [1, 1, 3, 3],
-        #     [1, 1, 1, 1],
-        #     [0, 0, 1, 1],
-        #     [1, 1, 1, 1],
-        #     [1, 1, 1, 1],
-        #     True,
-        #     [1, 192, 14, 14],
-        # ),
-        # (
-        #     [384, 384, 128, 128, 128, 64, 64, 64, 64],
-        #     [256, 256, 64, 64, 128, 64, 64, 64, 64],
-        #     [1, 1, 1, 1, 1, 3, 3, 3, 3],
-        #     [1, 1, 1, 1, 1, 1, 1, 1, 1],
-        #     [0, 0, 0, 0, 0, 1, 1, 1, 1],
-        #     [1, 1, 1, 1, 1, 1, 1, 1, 1],
-        #     [1, 1, 1, 1, 1, 1, 1, 1, 1],
-        #     False,
-        #     [1, 384, 7, 7],
-        # ),
-    ],
-)
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
-def test_yolo_v11_c3k2(
-    device,
-    use_program_cache,
-    reset_seeds,
-    in_channel,
-    out_channel,
-    kernel,
-    stride,
-    padding,
-    dilation,
-    groups,
-    is_bk_enabled,
-    fwd_input_shape,
-):
-    torch_module = torch_c3k2(in_channel, out_channel, kernel, stride, padding, dilation, groups, is_bk_enabled)
-    torch_module.eval()
-    torch_input, ttnn_input = create_yolov11_input_tensors(
-        device,
-        batch=fwd_input_shape[0],
-        input_channels=fwd_input_shape[1],
-        input_height=fwd_input_shape[2],
-        input_width=fwd_input_shape[3],
-    )
-    ttnn_input = ttnn.to_device(ttnn_input, device=device, memory_config=ttnn.L1_MEMORY_CONFIG)
-    # ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, )
-    torch_output = torch_module(torch_input)
-    parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device)
-    ttnn_module = ttnn_c3k2(
-        device=device, parameter=parameters.conv_args, conv_pt=parameters, is_bk_enabled=is_bk_enabled
-    )
-    ttnn_output = ttnn_module(x=ttnn_input, device=device)
-    ttnn_output = ttnn.to_torch(ttnn_output)
-    ttnn_output = ttnn_output.permute(0, 3, 1, 2)
-    ttnn_output = ttnn_output.reshape(torch_output.shape)
-    assert_with_pcc(torch_output, ttnn_output, 0.99999)
diff --git a/tests/ttnn/integration_tests/yolov11/test_ttnn_detect.py b/tests/ttnn/integration_tests/yolov11/test_ttnn_detect.py
deleted file mode 100644
index 2108bf23eb2..00000000000
--- a/tests/ttnn/integration_tests/yolov11/test_ttnn_detect.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
-# SPDX-License-Identifier: Apache-2.0
-import pytest
-import ttnn
-import torch
-from tests.ttnn.utils_for_testing import assert_with_pcc
-from models.experimental.functional_yolov11.tt.model_preprocessing import (
-    create_yolov11_input_tensors,
-    create_yolov11_model_parameters_detect,
-    create_yolov11_model_parameters,
-)
-from models.experimental.functional_yolov11.reference.yolov11 import Detect as torch_detect
-from models.experimental.functional_yolov11.tt.ttnn_yolov11 import Detect as ttnn_detect
-from ttnn.model_preprocessing import preprocess_model_parameters
-import math
-
-
-@pytest.mark.parametrize(
-    "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape",
-    [
-        (
-            [64, 64, 64, 128, 64, 64, 256, 64, 64, 64, 64, 80, 80, 80, 128, 128, 80, 80, 80, 256, 256, 80, 80, 80, 16],
-            [64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 80, 80, 80, 80, 128, 80, 80, 80, 80, 256, 80, 80, 80, 80, 1],
-            [3, 3, 1, 3, 3, 1, 3, 3, 1, 3, 1, 3, 1, 1, 3, 1, 3, 1, 1, 3, 1, 3, 1, 1, 1],
-            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
-            [1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0],
-            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
-            [1, 1, 1, 1, 1, 1, 1, 1, 1, 64, 1, 80, 1, 1, 128, 1, 80, 1, 1, 256, 1, 80, 1, 1, 1],
-            [[1, 64, 28, 28], [1, 128, 14, 14], [1, 256, 7, 7]],
-        ),
-    ],
-)
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
-def test_yolo_v11_detect(
-    device,
-    use_program_cache,
-    reset_seeds,
-    in_channel,
-    out_channel,
-    kernel,
-    stride,
-    padding,
-    dilation,
-    groups,
-    fwd_input_shape,
-):
-    torch_module = torch_detect(in_channel, out_channel, kernel, stride, padding, dilation, groups)
-    torch_module.eval()
-    torch_input_1, ttnn_input_1 = create_yolov11_input_tensors(
-        device,
-        batch=fwd_input_shape[0][0],
-        input_channels=fwd_input_shape[0][1],
-        input_height=fwd_input_shape[0][2],
-        input_width=fwd_input_shape[0][3],
-    )
-    torch_input_2, ttnn_input_2 = create_yolov11_input_tensors(
-        device,
-        batch=fwd_input_shape[1][0],
-        input_channels=fwd_input_shape[1][1],
-        input_height=fwd_input_shape[1][2],
-        input_width=fwd_input_shape[1][3],
-    )
-    torch_input_3, ttnn_input_3 = create_yolov11_input_tensors(
-        device,
-        batch=fwd_input_shape[2][0],
-        input_channels=fwd_input_shape[2][1],
-        input_height=fwd_input_shape[2][2],
-        input_width=fwd_input_shape[2][3],
-    )
-    ttnn_input_1 = ttnn.to_device(ttnn_input_1, device=device)
-    ttnn_input_1 = ttnn.to_layout(ttnn_input_1, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG)
-    ttnn_input_2 = ttnn.to_device(ttnn_input_2, device=device)
-    ttnn_input_2 = ttnn.to_layout(ttnn_input_2, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG)
-    ttnn_input_3 = ttnn.to_device(ttnn_input_3, device=device)
-    ttnn_input_3 = ttnn.to_layout(ttnn_input_3, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG)
-    torch_output = torch_module(torch_input_1, torch_input_2, torch_input_3)
-    parameters = create_yolov11_model_parameters_detect(
-        torch_module, torch_input_1, torch_input_2, torch_input_3, device=device
-    )
-    ttnn_module = ttnn_detect(device=device, parameter=parameters.model, conv_pt=parameters)
-
-    ttnn_output = ttnn_module(y1=ttnn_input_1, y2=ttnn_input_2, y3=ttnn_input_3, device=device)
-    ttnn_output = ttnn.to_torch(ttnn_output)
-    # ttnn_output = ttnn_output.permute(0, 2, 1)
-    ttnn_output = ttnn_output.reshape(torch_output.shape)
-    assert_with_pcc(torch_output, ttnn_output, 0.99999)
diff --git a/tests/ttnn/integration_tests/yolov11/test_ttnn_psa_block.py b/tests/ttnn/integration_tests/yolov11/test_ttnn_psa_block.py
deleted file mode 100644
index c2643318abf..00000000000
--- a/tests/ttnn/integration_tests/yolov11/test_ttnn_psa_block.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
-# SPDX-License-Identifier: Apache-2.0
-import pytest
-import ttnn
-import torch
-from tests.ttnn.utils_for_testing import assert_with_pcc
-from models.experimental.functional_yolov11.tt.model_preprocessing import (
-    create_yolov11_input_tensors,
-    create_yolov11_model_parameters,
-)
-from models.experimental.functional_yolov11.reference.yolov11 import PSABlock as torch_psa_block
-from models.experimental.functional_yolov11.tt.ttnn_yolov11 import PSABlock as ttnn_psa_block
-
-
-@pytest.mark.parametrize(
-    "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape",
-    [
-        (
-            [128, 128, 128, 128, 256],
-            [256, 128, 128, 256, 128],
-            [1, 1, 3, 1, 1],
-            [1, 1, 1, 1, 1],
-            [0, 0, 1, 0, 0],
-            [1, 1, 1, 1, 1],
-            [1, 1, 128, 1, 1],
-            [1, 128, 7, 7],
-        ),
-    ],
-)
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
-def test_yolo_v11_psa_block(
-    device,
-    use_program_cache,
-    reset_seeds,
-    in_channel,
-    out_channel,
-    kernel,
-    stride,
-    padding,
-    dilation,
-    groups,
-    fwd_input_shape,
-):
-    torch_module = torch_psa_block(in_channel, out_channel, kernel, stride, padding, dilation, groups)
-    torch_module.eval()
-    torch_input, ttnn_input = create_yolov11_input_tensors(
-        device,
-        batch=fwd_input_shape[0],
-        input_channels=fwd_input_shape[1],
-        input_height=fwd_input_shape[2],
-        input_width=fwd_input_shape[3],
-    )
-    ttnn_input = ttnn.to_device(ttnn_input, device=device)
-    ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG)
-    torch_output = torch_module(torch_input)
-    parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device)
-    ttnn_module = ttnn_psa_block(device=device, parameter=parameters.conv_args, conv_pt=parameters)
-    ttnn_output = ttnn_module(x=ttnn_input, device=device)
-    ttnn_output = ttnn.to_torch(ttnn_output)
-    ttnn_output = ttnn_output.permute(0, 3, 1, 2)
-    ttnn_output = ttnn_output.reshape(torch_output.shape)
-    assert_with_pcc(torch_output, ttnn_output, 0.99999)
diff --git a/tests/ttnn/integration_tests/yolov11/test_ttnn_sppf.py b/tests/ttnn/integration_tests/yolov11/test_ttnn_sppf.py
deleted file mode 100644
index d215363fcc5..00000000000
--- a/tests/ttnn/integration_tests/yolov11/test_ttnn_sppf.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
-# SPDX-License-Identifier: Apache-2.0
-
-import pytest
-import ttnn
-import torch
-from tests.ttnn.utils_for_testing import assert_with_pcc
-from models.experimental.functional_yolov11.tt.model_preprocessing import (
-    create_yolov11_input_tensors,
-    create_yolov11_model_parameters,
-)
-from models.experimental.functional_yolov11.reference.yolov11 import SPPF as torch_sppf
-from models.experimental.functional_yolov11.tt.ttnn_yolov11 import SPPF as ttnn_sppf
-
-
-@pytest.mark.parametrize(
-    "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape",
-    [
-        ([256, 512], [128, 256], [1, 1], [1, 1], [0, 0], [1, 1], [1, 1], [1, 256, 7, 7]),
-    ],
-)
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
-def test_yolo_v11_sppf(
-    device,
-    use_program_cache,
-    reset_seeds,
-    in_channel,
-    out_channel,
-    kernel,
-    stride,
-    padding,
-    dilation,
-    groups,
-    fwd_input_shape,
-):
-    torch_module = torch_sppf(in_channel, out_channel, kernel, stride, padding, dilation, groups)
-    torch_module.eval()
-    torch_input, ttnn_input = create_yolov11_input_tensors(
-        device,
-        batch=fwd_input_shape[0],
-        input_channels=fwd_input_shape[1],
-        input_height=fwd_input_shape[2],
-        input_width=fwd_input_shape[3],
-    )
-    ttnn_input = ttnn.to_device(ttnn_input, device=device)
-    ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG)
-    torch_output = torch_module(torch_input)
-    parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device)
-    ttnn_module = ttnn_sppf(device=device, parameter=parameters.conv_args, conv_pt=parameters)
-    ttnn_output = ttnn_module(x=ttnn_input, device=device)
-    ttnn_output = ttnn.to_torch(ttnn_output)
-    ttnn_output = ttnn_output.permute(0, 3, 1, 2)
-    ttnn_output = ttnn_output.reshape(torch_output.shape)
-    assert_with_pcc(torch_output, ttnn_output, 0.99999)
diff --git a/tests/ttnn/integration_tests/yolov11/test_ttnn_yolo_v11.py b/tests/ttnn/integration_tests/yolov11/test_ttnn_yolo_v11.py
new file mode 100644
index 00000000000..b3ee0120290
--- /dev/null
+++ b/tests/ttnn/integration_tests/yolov11/test_ttnn_yolo_v11.py
@@ -0,0 +1,776 @@
+# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+from pathlib import Path
+import os
+import pytest
+import sys
+import ttnn
+import torch.nn as nn
+import torch
+from tests.ttnn.utils_for_testing import assert_with_pcc
+from models.experimental.functional_yolov11.tt.model_preprocessing import (
+    create_yolov11_input_tensors,
+    create_yolov11_model_parameters,
+    create_yolov11_model_parameters_detect,
+)
+from models.experimental.functional_yolov11.reference.yolov11 import (
+    Attention as torch_attention,
+    Bottleneck as torch_bottleneck,
+    C2PSA as torch_c2psa_block,
+    C3k as torch_c3k,
+    C3k2 as torch_c3k2,
+    PSABlock as torch_psa_block,
+    SPPF as torch_sppf,
+    Detect as torch_detect,
+)
+from models.experimental.functional_yolov11.tt.ttnn_yolov11 import (
+    Attention as ttnn_attention,
+    Bottleneck as ttnn_bottleneck,
+    C2PSA as ttnn_c2psa_block,
+    C3K as ttnn_c3k,
+    C3k2 as ttnn_c3k2,
+    PSABlock as ttnn_psa_block,
+    SPPF as ttnn_sppf,
+    Detect as ttnn_detect,
+)
+from models.utility_functions import skip_for_grayskull
+
+from models.experimental.functional_yolov11.reference import yolov11
+from models.experimental.functional_yolov11.tt import ttnn_yolov11
+
+
+@pytest.mark.parametrize(
+    "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape",
+    [
+        ([128, 128, 128], [256, 128, 128], [1, 1, 3], [1, 1, 1], [0, 0, 1], [1, 1, 1], [1, 1, 128], [1, 128, 7, 7]),
+    ],
+)
+@skip_for_grayskull()
+@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
+def test_yolo_v11_attention(
+    device,
+    use_program_cache,
+    reset_seeds,
+    in_channel,
+    out_channel,
+    kernel,
+    stride,
+    padding,
+    dilation,
+    groups,
+    fwd_input_shape,
+):
+    torch_module = torch_attention(in_channel, out_channel, kernel, stride, padding, dilation, groups)
+    torch_module.eval()
+    torch_input, ttnn_input = create_yolov11_input_tensors(
+        device,
+        batch=fwd_input_shape[0],
+        input_channels=fwd_input_shape[1],
+        input_height=fwd_input_shape[2],
+        input_width=fwd_input_shape[3],
+    )
+    ttnn_input = ttnn.to_device(ttnn_input, device=device)
+    ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG)
+    torch_output = torch_module(torch_input)
+    parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device)
+    ttnn_module = ttnn_attention(device=device, parameter=parameters.conv_args, conv_pt=parameters)
+    ttnn_output = ttnn_module(x=ttnn_input, device=device)
+    ttnn_output = ttnn.to_torch(ttnn_output)
+    ttnn_output = ttnn_output.permute(0, 3, 1, 2)
+    ttnn_output = ttnn_output.reshape(torch_output.shape)
+    assert_with_pcc(torch_output, ttnn_output, 0.99)
+
+
+@skip_for_grayskull()
+@pytest.mark.parametrize(
+    "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape",
+    [
+        ([16, 8], [8, 16], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 16, 56, 56]),  # 1
+        ([32, 16], [16, 32], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 32, 38, 28]),  # 2
+        ([32, 32], [32, 32], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 32, 14, 14]),  # 3
+        ([64, 64], [64, 64], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 64, 7, 7]),
+        ([64, 32], [32, 64], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 64, 14, 14]),
+        ([32, 16], [16, 32], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 32, 28, 28]),
+        ([64, 32], [32, 64], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 64, 14, 14]),
+        ([64, 64], [64, 64], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 64, 7, 7]),
+    ],
+)
+@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
+def test_yolo_v11_bottleneck(
+    device,
+    use_program_cache,
+    reset_seeds,
+    in_channel,
+    out_channel,
+    kernel,
+    stride,
+    padding,
+    dilation,
+    groups,
+    fwd_input_shape,
+):
+    torch_module = torch_bottleneck(in_channel, out_channel, kernel, stride, padding, dilation, groups)
+    torch_module.eval()
+    torch_input, ttnn_input = create_yolov11_input_tensors(
+        device,
+        batch=fwd_input_shape[0],
+        input_channels=fwd_input_shape[1],
+        input_height=fwd_input_shape[2],
+        input_width=fwd_input_shape[3],
+    )
+    ttnn_input = ttnn.to_device(ttnn_input, device=device)
+    ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG)
+    torch_output = torch_module(torch_input)
+    parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device)
+    ttnn_module = ttnn_bottleneck(device=device, parameter=parameters.conv_args, conv_pt=parameters)
+    ttnn_output = ttnn_module(x=ttnn_input, device=device)
+    ttnn_output = ttnn.to_torch(ttnn_output)
+    ttnn_output = ttnn_output.permute(0, 3, 1, 2)
+    ttnn_output = ttnn_output.reshape(torch_output.shape)
+    assert_with_pcc(torch_output, ttnn_output, 0.99)
+
+
+@skip_for_grayskull()
+@pytest.mark.parametrize(
+    "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape",
+    [
+        (
+            [256, 256, 128, 128, 128, 128, 256],
+            [256, 256, 256, 128, 128, 256, 128],
+            [1, 1, 1, 1, 3, 1, 1],
+            [1, 1, 1, 1, 1, 1, 1],
+            [0, 0, 0, 0, 1, 0, 0],
+            [1, 1, 1, 1, 1, 1, 1],
+            [1, 1, 1, 1, 128, 1, 1],
+            [1, 256, 7, 7],
+        ),
+    ],
+)
+@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
+def test_yolo_v11_c2psa_block(
+    device,
+    use_program_cache,
+    reset_seeds,
+    in_channel,
+    out_channel,
+    kernel,
+    stride,
+    padding,
+    dilation,
+    groups,
+    fwd_input_shape,
+):
+    torch_module = torch_c2psa_block(in_channel, out_channel, kernel, stride, padding, dilation, groups)
+    torch_module.eval()
+    torch_input, ttnn_input = create_yolov11_input_tensors(
+        device,
+        batch=fwd_input_shape[0],
+        input_channels=fwd_input_shape[1],
+        input_height=fwd_input_shape[2],
+        input_width=fwd_input_shape[3],
+    )
+    ttnn_input = ttnn.to_device(ttnn_input, device=device)
+    ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG)
+    torch_output = torch_module(torch_input)
+    parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device)
+    ttnn_module = ttnn_c2psa_block(device=device, parameter=parameters.conv_args, conv_pt=parameters)
+    ttnn_output = ttnn_module(x=ttnn_input, device=device)
+    ttnn_output = ttnn.to_torch(ttnn_output)
+    ttnn_output = ttnn_output.permute(0, 3, 1, 2)
+    ttnn_output = ttnn_output.reshape(torch_output.shape)
+    assert_with_pcc(torch_output, ttnn_output, 0.99)
+
+
+@skip_for_grayskull()
+@pytest.mark.parametrize(
+    "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape",
+    [
+        (
+            [64, 64, 64, 32, 32, 32, 32],
+            [32, 32, 64, 32, 32, 32, 32],
+            [1, 1, 1, 3, 3, 3, 3],
+            [1, 1, 1, 1, 1, 1, 1],
+            [0, 0, 0, 1, 1, 1, 1],
+            [1, 1, 1, 1, 1, 1, 1],
+            [1, 1, 1, 1, 1, 1, 1],
+            [1, 64, 14, 14],
+        ),
+        (
+            [128, 128, 128, 64, 64, 64, 64],
+            [64, 64, 128, 64, 64, 64, 64],
+            [1, 1, 1, 3, 3, 3, 3],
+            [1, 1, 1, 1, 1, 1, 1],
+            [0, 0, 0, 1, 1, 1, 1],
+            [1, 1, 1, 1, 1, 1, 1],
+            [1, 1, 1, 1, 1, 1, 1],
+            [1, 128, 7, 7],
+        ),
+        (
+            [64, 64, 64, 32, 32, 32, 32],
+            [32, 32, 64, 32, 32, 32, 32],
+            [1, 1, 1, 3, 3, 3, 3],
+            [1, 1, 1, 1, 1, 1, 1],
+            [0, 0, 0, 1, 1, 1, 1],
+            [1, 1, 1, 1, 1, 1, 1],
+            [1, 1, 1, 1, 1, 1, 1],
+            [1, 64, 40, 40],
+        ),
+        (
+            [128, 128, 128, 64, 64, 64, 64],
+            [64, 64, 128, 64, 64, 64, 64],
+            [1, 1, 1, 3, 3, 3, 3],
+            [1, 1, 1, 1, 1, 1, 1],
+            [0, 0, 0, 1, 1, 1, 1],
+            [1, 1, 1, 1, 1, 1, 1],
+            [1, 1, 1, 1, 1, 1, 1],
+            [1, 128, 20, 20],
+        ),
+    ],
+)
+@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
+def test_yolo_v11_c3k(
+    device,
+    use_program_cache,
+    reset_seeds,
+    in_channel,
+    out_channel,
+    kernel,
+    stride,
+    padding,
+    dilation,
+    groups,
+    fwd_input_shape,
+):
+    torch_module = torch_c3k(in_channel, out_channel, kernel, stride, padding, dilation, groups)
+    torch_module.eval()
+    torch_input, ttnn_input = create_yolov11_input_tensors(
+        device,
+        batch=fwd_input_shape[0],
+        input_channels=fwd_input_shape[1],
+        input_height=fwd_input_shape[2],
+        input_width=fwd_input_shape[3],
+    )
+    ttnn_input = ttnn.to_device(ttnn_input, device=device)
+    ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG)
+    torch_output = torch_module(torch_input)
+    parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device)
+    ttnn_module = ttnn_c3k(device=device, parameter=parameters.conv_args, conv_pt=parameters)
+    ttnn_output = ttnn_module(x=ttnn_input, device=device)
+    ttnn_output = ttnn.to_torch(ttnn_output)
+    ttnn_output = ttnn_output.permute(0, 3, 1, 2)
+    ttnn_output = ttnn_output.reshape(torch_output.shape)
+    assert_with_pcc(torch_output, ttnn_output, 0.99)
+
+
+@skip_for_grayskull()
+@pytest.mark.parametrize(
+    "in_channel, out_channel, kernel, stride, padding, dilation, groups,is_bk_enabled,fwd_input_shape",
+    [
+        # 224
+        (
+            [32, 48, 16, 8],
+            [32, 64, 8, 16],
+            [1, 1, 3, 3],
+            [1, 1, 1, 1],
+            [0, 0, 1, 1],
+            [1, 1, 1, 1],
+            [1, 1, 1, 1],
+            True,
+            [1, 32, 56, 56],
+        ),
+        (
+            [64, 96, 32, 16],
+            [64, 128, 16, 32],
+            [1, 1, 3, 3],
+            [1, 1, 1, 1],
+            [0, 0, 1, 1],
+            [1, 1, 1, 1],
+            [1, 1, 1, 1],
+            True,
+            [1, 64, 28, 28],
+        ),
+        (
+            [128, 192, 64, 64, 64, 32, 32, 32, 32],
+            [128, 128, 32, 32, 64, 32, 32, 32, 32],
+            [1, 1, 1, 1, 1, 3, 3, 3, 3],
+            [1, 1, 1, 1, 1, 1, 1, 1, 1],
+            [0, 0, 0, 0, 0, 1, 1, 1, 1],
+            [1, 1, 1, 1, 1, 1, 1, 1, 1],
+            [1, 1, 1, 1, 1, 1, 1, 1, 1],
+            False,
+            [1, 128, 14, 14],
+        ),
+        (
+            [256, 384, 128, 128, 128, 64, 64, 64, 64],
+            [256, 256, 64, 64, 128, 64, 64, 64, 64],
+            [1, 1, 1, 1, 1, 3, 3, 3, 3],
+            [1, 1, 1, 1, 1, 1, 1, 1, 1],
+            [0, 0, 0, 0, 0, 1, 1, 1, 1],
+            [1, 1, 1, 1, 1, 1, 1, 1, 1],
+            [1, 1, 1, 1, 1, 1, 1, 1, 1],
+            False,
+            [1, 256, 7, 7],
+        ),
+        (
+            [384, 192, 64, 32],
+            [128, 128, 32, 64],
+            [1, 1, 3, 3],
+            [1, 1, 1, 1],
+            [0, 0, 1, 1],
+            [1, 1, 1, 1],
+            [1, 1, 1, 1],
+            True,
+            [1, 384, 14, 14],
+        ),
+        (
+            [256, 96, 32, 16],
+            [64, 64, 16, 32],
+            [1, 1, 3, 3],
+            [1, 1, 1, 1],
+            [0, 0, 1, 1],
+            [1, 1, 1, 1],
+            [1, 1, 1, 1],
+            True,
+            [1, 256, 28, 28],
+        ),
+        (
+            [192, 192, 64, 32],
+            [128, 128, 32, 64],
+            [1, 1, 3, 3],
+            [1, 1, 1, 1],
+            [0, 0, 1, 1],
+            [1, 1, 1, 1],
+            [1, 1, 1, 1],
+            True,
+            [1, 192, 14, 14],
+        ),
+        (
+            [384, 384, 128, 128, 128, 64, 64, 64, 64],
+            [256, 256, 64, 64, 128, 64, 64, 64, 64],
+            [1, 1, 1, 1, 1, 3, 3, 3, 3],
+            [1, 1, 1, 1, 1, 1, 1, 1, 1],
+            [0, 0, 0, 0, 0, 1, 1, 1, 1],
+            [1, 1, 1, 1, 1, 1, 1, 1, 1],
+            [1, 1, 1, 1, 1, 1, 1, 1, 1],
+            False,
+            [1, 384, 7, 7],
+        ),
+        # 640
+        (
+            [32, 48, 16, 8],
+            [32, 64, 8, 16],
+            [1, 1, 3, 3],
+            [1, 1, 1, 1],
+            [0, 0, 1, 1],
+            [1, 1, 1, 1],
+            [1, 1, 1, 1],
+            True,
+            [1, 32, 160, 160],
+        ),
+        (
+            [64, 96, 32, 16],
+            [64, 128, 16, 32],
+            [1, 1, 3, 3],
+            [1, 1, 1, 1],
+            [0, 0, 1, 1],
+            [1, 1, 1, 1],
+            [1, 1, 1, 1],
+            True,
+            [1, 64, 80, 80],
+        ),
+        (
+            [128, 192, 64, 64, 64, 32, 32, 32, 32],
+            [128, 128, 32, 32, 64, 32, 32, 32, 32],
+            [1, 1, 1, 1, 1, 3, 3, 3, 3],
+            [1, 1, 1, 1, 1, 1, 1, 1, 1],
+            [0, 0, 0, 0, 0, 1, 1, 1, 1],
+            [1, 1, 1, 1, 1, 1, 1, 1, 1],
+            [1, 1, 1, 1, 1, 1, 1, 1, 1],
+            False,
+            [1, 128, 40, 40],
+        ),
+        (
+            [256, 384, 128, 128, 128, 64, 64, 64, 64],
+            [256, 256, 64, 64, 128, 64, 64, 64, 64],
+            [1, 1, 1, 1, 1, 3, 3, 3, 3],
+            [1, 1, 1, 1, 1, 1, 1, 1, 1],
+            [0, 0, 0, 0, 0, 1, 1, 1, 1],
+            [1, 1, 1, 1, 1, 1, 1, 1, 1],
+            [1, 1, 1, 1, 1, 1, 1, 1, 1],
+            False,
+            [1, 256, 20, 20],
+        ),
+        (
+            [384, 192, 64, 32],
+            [128, 128, 32, 64],
+            [1, 1, 3, 3],
+            [1, 1, 1, 1],
+            [0, 0, 1, 1],
+            [1, 1, 1, 1],
+            [1, 1, 1, 1],
+            True,
+            [1, 384, 40, 40],
+        ),
+        (
+            [256, 96, 32, 16],
+            [64, 64, 16, 32],
+            [1, 1, 3, 3],
+            [1, 1, 1, 1],
+            [0, 0, 1, 1],
+            [1, 1, 1, 1],
+            [1, 1, 1, 1],
+            True,
+            [1, 256, 80, 80],
+        ),
+        (
+            [192, 192, 64, 32],
+            [128, 128, 32, 64],
+            [1, 1, 3, 3],
+            [1, 1, 1, 1],
+            [0, 0, 1, 1],
+            [1, 1, 1, 1],
+            [1, 1, 1, 1],
+            True,
+            [1, 192, 40, 40],
+        ),
+        (
+            [384, 384, 128, 128, 128, 64, 64, 64, 64],
+            [256, 256, 64, 64, 128, 64, 64, 64, 64],
+            [1, 1, 1, 1, 1, 3, 3, 3, 3],
+            [1, 1, 1, 1, 1, 1, 1, 1, 1],
+            [0, 0, 0, 0, 0, 1, 1, 1, 1],
+            [1, 1, 1, 1, 1, 1, 1, 1, 1],
+            [1, 1, 1, 1, 1, 1, 1, 1, 1],
+            False,
+            [1, 384, 20, 20],
+        ),
+    ],
+)
+@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
+def test_yolo_v11_c3k2(
+    device,
+    use_program_cache,
+    reset_seeds,
+    in_channel,
+    out_channel,
+    kernel,
+    stride,
+    padding,
+    dilation,
+    groups,
+    is_bk_enabled,
+    fwd_input_shape,
+):
+    torch_module = torch_c3k2(in_channel, out_channel, kernel, stride, padding, dilation, groups, is_bk_enabled)
+    torch_module.eval()
+    torch_input, ttnn_input = create_yolov11_input_tensors(
+        device,
+        batch=fwd_input_shape[0],
+        input_channels=fwd_input_shape[1],
+        input_height=fwd_input_shape[2],
+        input_width=fwd_input_shape[3],
+    )
+    ttnn_input = ttnn.to_device(ttnn_input, device=device, memory_config=ttnn.L1_MEMORY_CONFIG)
+    ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT)
+    torch_output = torch_module(torch_input)
+    parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device)
+    ttnn_module = ttnn_c3k2(
+        device=device, parameter=parameters.conv_args, conv_pt=parameters, is_bk_enabled=is_bk_enabled
+    )
+    ttnn_output = ttnn_module(x=ttnn_input, device=device)
+    ttnn_output = ttnn.to_torch(ttnn_output)
+    ttnn_output = ttnn_output.permute(0, 3, 1, 2)
+    ttnn_output = ttnn_output.reshape(torch_output.shape)
+    assert_with_pcc(torch_output, ttnn_output, 0.99)
+
+
+@skip_for_grayskull()
+@pytest.mark.parametrize(
+    "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape",
+    [
+        (
+            [128, 128, 128, 128, 256],
+            [256, 128, 128, 256, 128],
+            [1, 1, 3, 1, 1],
+            [1, 1, 1, 1, 1],
+            [0, 0, 1, 0, 0],
+            [1, 1, 1, 1, 1],
+            [1, 1, 128, 1, 1],
+            [1, 128, 7, 7],
+        ),
+    ],
+)
+@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
+def test_yolo_v11_psa_block(
+    device,
+    use_program_cache,
+    reset_seeds,
+    in_channel,
+    out_channel,
+    kernel,
+    stride,
+    padding,
+    dilation,
+    groups,
+    fwd_input_shape,
+):
+    torch_module = torch_psa_block(in_channel, out_channel, kernel, stride, padding, dilation, groups)
+    torch_module.eval()
+    torch_input, ttnn_input = create_yolov11_input_tensors(
+        device,
+        batch=fwd_input_shape[0],
+        input_channels=fwd_input_shape[1],
+        input_height=fwd_input_shape[2],
+        input_width=fwd_input_shape[3],
+    )
+    ttnn_input = ttnn.to_device(ttnn_input, device=device)
+    ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG)
+    torch_output = torch_module(torch_input)
+    parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device)
+    ttnn_module = ttnn_psa_block(device=device, parameter=parameters.conv_args, conv_pt=parameters)
+    ttnn_output = ttnn_module(x=ttnn_input, device=device)
+    ttnn_output = ttnn.to_torch(ttnn_output)
+    ttnn_output = ttnn_output.permute(0, 3, 1, 2)
+    ttnn_output = ttnn_output.reshape(torch_output.shape)
+    assert_with_pcc(torch_output, ttnn_output, 0.99)
+
+
+@skip_for_grayskull()
+@pytest.mark.parametrize(
+    "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape",
+    [
+        ([256, 512], [128, 256], [1, 1], [1, 1], [0, 0], [1, 1], [1, 1], [1, 256, 20, 20]),
+    ],
+)
+@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
+def test_yolo_v11_sppf(
+    device,
+    use_program_cache,
+    reset_seeds,
+    in_channel,
+    out_channel,
+    kernel,
+    stride,
+    padding,
+    dilation,
+    groups,
+    fwd_input_shape,
+):
+    torch_module = torch_sppf(in_channel, out_channel, kernel, stride, padding, dilation, groups)
+    torch_module.eval()
+    torch_input, ttnn_input = create_yolov11_input_tensors(
+        device,
+        batch=fwd_input_shape[0],
+        input_channels=fwd_input_shape[1],
+        input_height=fwd_input_shape[2],
+        input_width=fwd_input_shape[3],
+    )
+    ttnn_input = ttnn.to_device(ttnn_input, device=device)
+    ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG)
+    torch_output = torch_module(torch_input)
+    parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device)
+    ttnn_module = ttnn_sppf(device=device, parameter=parameters.conv_args, conv_pt=parameters)
+    ttnn_output = ttnn_module(x=ttnn_input, device=device)
+    ttnn_output = ttnn.to_torch(ttnn_output)
+    ttnn_output = ttnn_output.permute(0, 3, 1, 2)
+    ttnn_output = ttnn_output.reshape(torch_output.shape)
+    assert_with_pcc(torch_output, ttnn_output, 0.99)
+
+
+try:
+    sys.modules["ultralytics"] = yolov11
+    sys.modules["ultralytics.nn.tasks"] = yolov11
+    sys.modules["ultralytics.nn.modules.conv"] = yolov11
+    sys.modules["ultralytics.nn.modules.block"] = yolov11
+    sys.modules["ultralytics.nn.modules.head"] = yolov11
+
+except KeyError:
+    print("models.experimental.functional_yolov11.reference.yolov11 not found.")
+
+
+class Ensemble(nn.ModuleList):
+    def __init__(self):
+        super(Ensemble, self).__init__()
+
+    def forward(self, x, augment=False):
+        y = []
+        for module in self:
+            y.append(module(x, augment)[0])
+        y = torch.cat(y, 1)
+        return y, None
+
+
+def attempt_download(file, repo="ultralytics/assets"):
+    tests = Path(__file__).parent.parent / "yolov11"
+    file_path = tests / Path(str(file).strip().replace("'", "").lower())
+
+    if not file_path.exists():
+        name = "yolo11n.pt"  # file_path.name
+        msg = f"{file_path} missing, try downloading from https://github.com/{repo}/releases/"
+
+        try:
+            url = f"https://github.com/{repo}/releases/download/v8.3.0/{name}"
+
+            print(f"Downloading {url} to {file_path}...")
+            torch.hub.download_url_to_file(url, file_path)
+
+            # Validate the file
+            assert file_path.exists() and file_path.stat().st_size > 1e6, f"Download failed for {name}"
+
+        except Exception as e:
+            print(f"Error downloading from GitHub: {e}. Trying secondary source...")
+
+            url = f"https://storage.googleapis.com/{repo}/ckpt/{name}"
+            print(f"Downloading {url} to {file_path}...")
+            os.system(f"curl -L {url} -o {file_path}")
+
+            if not file_path.exists() or file_path.stat().st_size < 1e6:
+                file_path.unlink(missing_ok=True)
+                print(f"ERROR: Download failure for {msg}")
+            else:
+                print(f"Download succeeded from secondary source!")
+    return file_path
+
+
+# Function to load weights into the model
+def attempt_load(weights, map_location=None):
+    model = Ensemble()
+
+    # Iterate through the weights and load them
+    for w in weights if isinstance(weights, list) else [weights]:
+        weight_path = attempt_download(w)
+        ckpt = torch.load(weight_path, map_location=map_location)
+        model.append(ckpt["ema" if ckpt.get("ema") else "model"].float().eval())
+    for m in model.modules():
+        if isinstance(m, (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU)):
+            m.inplace = True
+        elif isinstance(m, nn.Upsample):
+            m.recompute_scale_factor = None
+
+    if len(model) == 1:
+        return model[-1]
+    else:
+        for k in ["names", "stride"]:
+            setattr(model, k, getattr(model[-1], k))
+        return model
+
+
+@skip_for_grayskull()
+@pytest.mark.parametrize(
+    "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape",
+    [
+        (
+            [64, 64, 64, 128, 64, 64, 256, 64, 64, 64, 64, 80, 80, 80, 128, 128, 80, 80, 80, 256, 256, 80, 80, 80, 16],
+            [64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 80, 80, 80, 80, 128, 80, 80, 80, 80, 256, 80, 80, 80, 80, 1],
+            [3, 3, 1, 3, 3, 1, 3, 3, 1, 3, 1, 3, 1, 1, 3, 1, 3, 1, 1, 3, 1, 3, 1, 1, 1],
+            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+            [1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0],
+            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+            [1, 1, 1, 1, 1, 1, 1, 1, 1, 64, 1, 80, 1, 1, 128, 1, 80, 1, 1, 256, 1, 80, 1, 1, 1],
+            [[1, 64, 28, 28], [1, 128, 14, 14], [1, 256, 7, 7]],
+        ),
+    ],
+)
+@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
+def test_yolo_v11_detect(
+    device,
+    use_program_cache,
+    reset_seeds,
+    in_channel,
+    out_channel,
+    kernel,
+    stride,
+    padding,
+    dilation,
+    groups,
+    fwd_input_shape,
+):
+    torch_module = torch_detect(in_channel, out_channel, kernel, stride, padding, dilation, groups)
+    torch_module.eval()
+    torch_input_1, ttnn_input_1 = create_yolov11_input_tensors(
+        device,
+        batch=fwd_input_shape[0][0],
+        input_channels=fwd_input_shape[0][1],
+        input_height=fwd_input_shape[0][2],
+        input_width=fwd_input_shape[0][3],
+    )
+    torch_input_2, ttnn_input_2 = create_yolov11_input_tensors(
+        device,
+        batch=fwd_input_shape[1][0],
+        input_channels=fwd_input_shape[1][1],
+        input_height=fwd_input_shape[1][2],
+        input_width=fwd_input_shape[1][3],
+    )
+    torch_input_3, ttnn_input_3 = create_yolov11_input_tensors(
+        device,
+        batch=fwd_input_shape[2][0],
+        input_channels=fwd_input_shape[2][1],
+        input_height=fwd_input_shape[2][2],
+        input_width=fwd_input_shape[2][3],
+    )
+    ttnn_input_1 = ttnn.to_device(ttnn_input_1, device=device)
+    ttnn_input_1 = ttnn.to_layout(ttnn_input_1, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG)
+    ttnn_input_2 = ttnn.to_device(ttnn_input_2, device=device)
+    ttnn_input_2 = ttnn.to_layout(ttnn_input_2, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG)
+    ttnn_input_3 = ttnn.to_device(ttnn_input_3, device=device)
+    ttnn_input_3 = ttnn.to_layout(ttnn_input_3, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG)
+    torch_output = torch_module(torch_input_1, torch_input_2, torch_input_3)
+    parameters = create_yolov11_model_parameters_detect(
+        torch_module, torch_input_1, torch_input_2, torch_input_3, device=device
+    )
+    ttnn_module = ttnn_detect(device=device, parameter=parameters.model, conv_pt=parameters)
+
+    ttnn_output = ttnn_module(y1=ttnn_input_1, y2=ttnn_input_2, y3=ttnn_input_3, device=device)
+    ttnn_output = ttnn.to_torch(ttnn_output)
+    ttnn_output = ttnn_output.reshape(torch_output.shape)
+    assert_with_pcc(torch_output, ttnn_output, 0.99)
+
+
+@skip_for_grayskull()
+@pytest.mark.parametrize(
+    "resolution",
+    [
+        ([1, 3, 224, 224]),
+        ([1, 3, 640, 640]),
+    ],
+)
+@pytest.mark.parametrize(
+    "use_pretrained_weight",
+    [
+        False,
+        # True      # uncomment  to run the model for real weights
+    ],
+    ids=[
+        "pretrained_weight_false",
+        # "pretrained_weight_true",    # uncomment to run the model for real weights
+    ],
+)
+@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
+def test_yolov11(device, use_program_cache, reset_seeds, resolution, use_pretrained_weight):
+    torch_input, ttnn_input = create_yolov11_input_tensors(
+        device, batch=resolution[0], input_channels=resolution[1], input_height=resolution[2], input_width=resolution[3]
+    )
+    if use_pretrained_weight:
+        torch_model = attempt_load("yolo11n.pt", map_location="cpu")
+        state_dict = torch_model.state_dict()
+        torch_model = yolov11.YoloV11()
+        ds_state_dict = {k: v for k, v in state_dict.items()}
+        new_state_dict = {}
+        for (name1, parameter1), (name2, parameter2) in zip(torch_model.state_dict().items(), ds_state_dict.items()):
+            if isinstance(parameter2, torch.FloatTensor):
+                new_state_dict[name1] = parameter2
+        torch_model.load_state_dict(new_state_dict)
+    else:
+        torch_model = yolov11.YoloV11()
+    torch_model.eval()
+
+    torch_output = torch_model(torch_input)
+    parameters = create_yolov11_model_parameters(torch_model, torch_input, device=device)
+    ttnn_model = ttnn_yolov11.YoloV11(device, parameters)
+    ttnn_output = ttnn_model(ttnn_input)
+    ttnn_output = ttnn.to_torch(ttnn_output)
+
+    ttnn_output = ttnn_output.reshape(torch_output.shape)
+    assert_with_pcc(torch_output, ttnn_output, 0.99)
diff --git a/tests/ttnn/integration_tests/yolov11/test_ttnn_yolov11.py b/tests/ttnn/integration_tests/yolov11/test_ttnn_yolov11.py
deleted file mode 100644
index f694d9081b9..00000000000
--- a/tests/ttnn/integration_tests/yolov11/test_ttnn_yolov11.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
-
-# SPDX-License-Identifier: Apache-2.0
-
-import pytest
-import ttnn
-import torch
-from tests.ttnn.utils_for_testing import assert_with_pcc
-import sys
-
-from models.experimental.functional_yolov11.reference import yolov11
-
-from models.experimental.functional_yolov11.tt.model_preprocessing import (
-    create_yolov11_input_tensors,
-    create_yolov11_model_parameters,
-)
-from models.experimental.functional_yolov11.tt import ttnn_yolov11
-import torch.nn as nn
-
-try:
-    sys.modules["ultralytics"] = yolov11
-    sys.modules["ultralytics.nn.tasks"] = yolov11
-    sys.modules["ultralytics.nn.modules.conv"] = yolov11
-    sys.modules["ultralytics.nn.modules.block"] = yolov11
-    sys.modules["ultralytics.nn.modules.head"] = yolov11
-
-except KeyError:
-    print("models.experimental.functional_yolov11.reference.yolov11 not found.")
-
-
-class Ensemble(nn.ModuleList):
-    def __init__(self):
-        super(Ensemble, self).__init__()
-
-    def forward(self, x, augment=False):
-        y = []
-        for module in self:
-            y.append(module(x, augment)[0])
-        y = torch.cat(y, 1)
-        return y, None
-
-
-def attempt_load(weights, map_location=None):
-    model = Ensemble()
-    for w in weights if isinstance(weights, list) else [weights]:
-        w = "models/experimental/functional_yolov11/reference/yolo11n.pt"
-        ckpt = torch.load(w, map_location=map_location)
-        model.append(ckpt["ema" if ckpt.get("ema") else "model"].float().eval())
-    for m in model.modules():
-        if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
-            m.inplace = True
-        elif type(m) is nn.Upsample:
-            m.recompute_scale_factor = None
-
-    if len(model) == 1:
-        return model[-1]
-    else:
-        for k in ["names", "stride"]:
-            setattr(model, k, getattr(model[-1], k))
-        return model
-
-
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True)
-def test_yolov11(device, use_program_cache, reset_seeds):
-    torch_input, ttnn_input = create_yolov11_input_tensors(device, input_channels=3, input_height=640, input_width=640)
-
-    torch_model = attempt_load("yolov11n.pt", map_location="cpu")
-    state_dict = torch_model.state_dict()
-    torch_model = yolov11.YoloV11()
-    ds_state_dict = {k: v for k, v in state_dict.items()}
-    new_state_dict = {}
-    for (name1, parameter1), (name2, parameter2) in zip(torch_model.state_dict().items(), ds_state_dict.items()):
-        if isinstance(parameter2, torch.FloatTensor):
-            new_state_dict[name1] = parameter2
-    torch_model.load_state_dict(new_state_dict)
-    torch_model.eval()
-
-    torch_output = torch_model(torch_input)
-    parameters = create_yolov11_model_parameters(torch_model, torch_input, device=device)
-    ttnn_model = ttnn_yolov11.YoloV11(device, parameters)
-    ttnn_output = ttnn_model(ttnn_input)
-    # l1 = torch.load("/home/ubuntu/venkatesh_yolov11/tt-metal/models/experimental/functional_yolov11/dumps/torch_out.pth")
-    # l1 = torch.load("/home/ubuntu/venkatesh_yolov11/tt-metal/models/experimental/functional_yolov11/dumps/tt_out.pth")
-    # assert_with_pcc(l1, l2, 0.99)
-    ttnn_output = ttnn.to_torch(ttnn_output)
-    # ttnn_output = ttnn_output.permute(0, 2, 1)
-    print(ttnn_output.shape, torch_output.shape)
-    ttnn_output = ttnn_output.reshape(torch_output.shape)
-    assert_with_pcc(torch_output, ttnn_output, 0.99999)
diff --git a/tests/ttnn/unit_tests/operations/test_concat.py b/tests/ttnn/unit_tests/operations/test_concat.py
index d8abf381500..397b6b74598 100644
--- a/tests/ttnn/unit_tests/operations/test_concat.py
+++ b/tests/ttnn/unit_tests/operations/test_concat.py
@@ -53,108 +53,112 @@ def test_concat(device, height, width, dim, async_mode):
 
 
 @pytest.mark.parametrize(
-    "inputs, output_shard_shape, shard_grid, strategy, layout, cache_mode,dtype",
+    "inputs, output_shard_shape, shard_grid, strategy, layout, cache_mode",
     (
-        # (
-        #     [((1,1,49,128), (1,128)), ((1,1,49,128), (1,128)),((1,1,49,128), (1,128)),((1,1,49,128), (1,128))],
-        #     (1,512),
-        #     ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7,7))}),
-        #      ttnn.ShardStrategy.HEIGHT,
-        #      ttnn.ROW_MAJOR_LAYOUT,
-        #      False,
-        #      ttnn.bfloat16
-        # ),
-        #  (
-        #     [((1,1,400,128), (7,128)), ((1,1,400,128), (7,128)),((1,1,400,128), (7,128)),((1,1,400,128), (7,128))],
-        #     (7,512),
-        #     ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7,7))}),
-        #      ttnn.ShardStrategy.HEIGHT,
-        #      ttnn.ROW_MAJOR_LAYOUT,
-        #      False,
-        #      ttnn.bfloat16
-        # ),
-        # (  # fp8 case #C3K
-        #     [((1, 1, 49, 64), (1, 64)), ((1, 1, 49, 64), (1, 64))],
-        #     (1, 128),
-        #     ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}),
-        #     ttnn.ShardStrategy.HEIGHT,
-        #     ttnn.TILE_LAYOUT,
-        #     False,
-        #     ttnn.bfloat8_b
-        # ),
-        # ( #C3K
-        #     [((1, 1, 196, 32), (4, 32)), ((1, 1, 196, 32), (4, 32))],
-        #     (4, 64),
-        #     ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}),
-        #     ttnn.ShardStrategy.HEIGHT,
-        #     ttnn.TILE_LAYOUT,
-        #     False,
-        #     ttnn.bfloat8_b
-        # ),
-        # c3k2
-        # (
-        #     [((1, 1, 25600, 16), (400, 16)), ((1, 1, 25600, 16), (400, 16)), ((1, 1, 25600, 16), (400, 16))],
-        #     (400, 48),
-        #     ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}),
-        #     ttnn.ShardStrategy.HEIGHT,
-        #     ttnn.ROW_MAJOR_LAYOUT,
-        #     False,
-        #     ttnn.bfloat16,
-        # ),
-        # (
-        #     [((1, 1, 25600, 16), (400, 16)), ((1, 1, 25600, 16), (400, 16)), ((1, 1, 25600, 16), (400, 16))],
-        #     (400, 48),
-        #     ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}),
-        #     ttnn.ShardStrategy.HEIGHT,
-        #     ttnn.ROW_MAJOR_LAYOUT,
-        #     False,
-        #     ttnn.bfloat16,
-        # ),
-        # (
-        #     [((1, 1, 6400, 32), (102, 32)), ((1, 1, 6400, 32), (102, 32)), ((1, 1, 6400, 32), (102, 32))],
-        #     (102, 96),
-        #     ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}),
-        #     ttnn.ShardStrategy.HEIGHT,
-        #     ttnn.ROW_MAJOR_LAYOUT,
-        #     False,
-        #     ttnn.bfloat16,
-        # ),
-        # (
-        #     [((1, 1, 1600, 64), (25, 64)), ((1, 1, 1600, 64), (25, 64)), ((1, 1, 1600, 64), (25, 64))],
-        #     (25, 192),
-        #     ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}),
-        #     ttnn.ShardStrategy.HEIGHT,
-        #     ttnn.ROW_MAJOR_LAYOUT,
-        #     False,
-        #     ttnn.bfloat16,
-        # ),
         (
-            [
-                ((1, 1, 1600, 256), (25, 256)),
-                ((1, 1, 1600, 128), (25, 256)),
-            ],
-            (25, 384),
-            ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}),
+            [((1, 1, 160, 32), (80, 32)), ((1, 1, 160, 32), (80, 32))],
+            (80, 64),
+            ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(0, 1))}),
             ttnn.ShardStrategy.HEIGHT,
             ttnn.ROW_MAJOR_LAYOUT,
             False,
-            ttnn.bfloat16,
         ),
-        # (
-        #     [((1, 1, 6400, 128), (102, 128)), ((1, 1, 6400, 128), (102, 128)) ],
-        #     (102, 256),
-        #     ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}),
-        #     ttnn.ShardStrategy.HEIGHT,
-        #     ttnn.ROW_MAJOR_LAYOUT,
-        #     False,
-        #     ttnn.bfloat16,
-        # ),
+        (
+            [((1, 1, 160, 32), (80, 32)), ((1, 1, 160, 16), (80, 16))],
+            (80, 48),
+            ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(0, 1))}),
+            ttnn.ShardStrategy.HEIGHT,
+            ttnn.ROW_MAJOR_LAYOUT,
+            False,
+        ),
+        (
+            [((1, 1, 25600, 64), (512, 64)), ((1, 1, 25600, 64), (512, 64))],
+            (512, 128),
+            ttnn.CoreRangeSet(
+                {
+                    ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 5)),
+                    ttnn.CoreRange(ttnn.CoreCoord(0, 6), ttnn.CoreCoord(1, 6)),
+                }
+            ),
+            ttnn.ShardStrategy.HEIGHT,
+            ttnn.ROW_MAJOR_LAYOUT,
+            False,
+        ),
+        pytest.param(
+            [((1, 1, 25600, 64), (512, 64)), ((1, 1, 25600, 64), (512, 64))],
+            (512, 128),
+            ttnn.CoreRangeSet(
+                {
+                    ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 5)),
+                    ttnn.CoreRange(ttnn.CoreCoord(0, 6), ttnn.CoreCoord(1, 6)),
+                }
+            ),
+            ttnn.ShardStrategy.HEIGHT,
+            ttnn.ROW_MAJOR_LAYOUT,
+            True,
+        ),
+        (
+            [((1, 1, 16, 16), (8, 16)), ((1, 1, 16, 16), (8, 16)), ((1, 1, 16, 16), (8, 16))],
+            (8, 48),
+            ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(0, 1))}),
+            ttnn.ShardStrategy.HEIGHT,
+            ttnn.ROW_MAJOR_LAYOUT,
+            False,
+        ),
+        (
+            [((1, 1, 16, 16), (8, 16)), ((1, 1, 16, 16), (8, 16)), ((1, 1, 16, 16), (8, 16))],
+            (8, 48),
+            ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(0, 1))}),
+            ttnn.ShardStrategy.HEIGHT,
+            ttnn.ROW_MAJOR_LAYOUT,
+            True,
+        ),
+        (
+            [((1, 1, 8, 64), (8, 16)), ((1, 1, 7, 64), (7, 16)), ((1, 1, 23, 64), (23, 16))],
+            (38, 16),
+            ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(0, 3))}),
+            ttnn.ShardStrategy.WIDTH,
+            ttnn.ROW_MAJOR_LAYOUT,
+            False,
+        ),
+        (
+            [((1, 1, 8, 64), (8, 16)), ((1, 1, 7, 64), (7, 16)), ((1, 1, 23, 64), (23, 16))],
+            (38, 16),
+            ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(0, 3))}),
+            ttnn.ShardStrategy.WIDTH,
+            ttnn.ROW_MAJOR_LAYOUT,
+            True,
+        ),
+        (
+            [((1, 1, 256, 96), (64, 96)), ((1, 1, 256, 64), (64, 64)), ((1, 1, 256, 32), (64, 32))],
+            (64, 192),
+            ttnn.CoreRangeSet(
+                {
+                    ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(0, 1)),
+                    ttnn.CoreRange(ttnn.CoreCoord(1, 0), ttnn.CoreCoord(2, 0)),
+                }
+            ),
+            ttnn.ShardStrategy.HEIGHT,
+            ttnn.TILE_LAYOUT,
+            False,
+        ),
+        (
+            [((1, 1, 32, 512), (32, 64)), ((1, 1, 64, 512), (64, 64)), ((1, 1, 96, 512), (96, 64))],
+            (192, 64),
+            ttnn.CoreRangeSet(
+                {
+                    ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(0, 3)),
+                    ttnn.CoreRange(ttnn.CoreCoord(1, 0), ttnn.CoreCoord(2, 1)),
+                }
+            ),
+            ttnn.ShardStrategy.WIDTH,
+            ttnn.TILE_LAYOUT,
+            False,
+        ),
     ),
 )
 @pytest.mark.parametrize("async_mode", [True, False], ids=["async_on", "async_off"])
-def test_sharded_concat(
-    device, inputs, output_shard_shape, shard_grid, strategy, layout, cache_mode, async_mode, dtype
-):
+def test_sharded_concat(device, inputs, output_shard_shape, shard_grid, strategy, layout, cache_mode, async_mode):
     device.enable_async(async_mode)
     if cache_mode:
         device.enable_program_cache()
@@ -172,7 +176,7 @@ def _gen_inputs(input_specs):
                 use_height_and_width_as_shard_shape=True,
             )
             torch_input_tensor = torch.rand(shape, dtype=torch.bfloat16)
-            input_tensor = ttnn.from_torch(torch_input_tensor, dtype=dtype, layout=layout, device=device)
+            input_tensor = ttnn.from_torch(torch_input_tensor, layout=layout, device=device)
             input_tensor = ttnn.to_memory_config(input_tensor, input_sharded_memory_config)
             input_tensors.append((torch_input_tensor, input_tensor))
         return input_tensors