diff --git a/models/experimental/functional_yolov11/README.md b/models/experimental/functional_yolov11/README.md new file mode 100644 index 00000000000..1f95c0c012d --- /dev/null +++ b/models/experimental/functional_yolov11/README.md @@ -0,0 +1,36 @@ +## YOLOv11n - Model + +#### Introduction + +**YOLOv11** is the latest iteration in the YOLO series, bringing cutting-edge improvements in accuracy, speed, and efficiency for real-time object detection. Building on the success of previous versions, YOLOv11 introduces enhanced architecture and optimized training methods, making it a versatile solution for a wide range of computer vision tasks, from object detection to image classification and pose estimation. + +#### Model Details + +* The entry point to the YOLOv11 model is located in: +`models/experimental/functional_yolov11/tt/ttnn_yolov11.py` + +* The model picks up weights from the **yolov11n.pt** file located in: +`models/experimental/functional_yolov11/reference/yolov11n.pt` + +#### Batch Size: +* Set to 1 by default. +* Batch size determines the number of input sequences processed simultaneously during training or inference, impacting computational efficiency and memory usage. +* It's recommended to keep the batch size to **1** for optimal performance. + +#### Running YOLOv11 Demo +* To run the YOLOv11 demo for different resolutions (**224x224** and **640x640**), use the following command: +`pytest --disable-warnings models/experimental/functional_yolov11/demo/demo.py` + +#### Input Data +* By default, the demo will receive inputs from the `models/experimental/functional_yolov11/demo/images` directory. To test the model on different input data, simply add new image files to this directory. + +#### Output Data + +* The output from the model will be saved in a **runs** folder created inside: +`models/experimental/functional_yolov11/demo/` +* For reference: +The model output(torch model) will be stored in the **torch_model** directory. +The TTNN model output will be stored in the **tt_model** directory. + +#### Pending Issues: +* [#17385](https://github.com/tenstorrent/tt-metal/issues/17835) - Tracing fails in Yolov11n model diff --git a/models/experimental/functional_yolov11/demo/demo.py b/models/experimental/functional_yolov11/demo/demo.py index bc9dd3eaf4b..2da75fa9978 100644 --- a/models/experimental/functional_yolov11/demo/demo.py +++ b/models/experimental/functional_yolov11/demo/demo.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 +from pathlib import Path import os import cv2 import sys @@ -11,17 +12,16 @@ import torch.nn as nn from loguru import logger from datetime import datetime -from functools import partial from models.utility_functions import disable_persistent_kernel_cache from models.experimental.functional_yolov11.reference import yolov11 - +from models.experimental.functional_yolov11.reference.yolov11 import attempt_load from models.experimental.functional_yolov11.tt import ttnn_yolov11 - from models.experimental.functional_yolov11.tt.model_preprocessing import ( create_yolov11_input_tensors, create_yolov11_model_parameters, ) from models.experimental.functional_yolov11.demo.demo_utils import LoadImages, preprocess, postprocess +from models.utility_functions import skip_for_grayskull try: sys.modules["ultralytics"] = yolov11 @@ -33,38 +33,6 @@ print("models.experimental.functional_yolov11.reference.yolov11 not found.") -class Ensemble(nn.ModuleList): - def __init__(self): - super(Ensemble, self).__init__() - - def forward(self, x, augment=False): - y = [] - for module in self: - y.append(module(x, augment)[0]) - y = torch.cat(y, 1) - return y, None - - -def attempt_load(weights, map_location=None): - model = Ensemble() - for w in weights if isinstance(weights, list) else [weights]: - w = "models/experimental/functional_yolov11/reference/yolo11n.pt" - ckpt = torch.load(w, map_location=map_location) - model.append(ckpt["ema" if ckpt.get("ema") else "model"].float().eval()) - for m in model.modules(): - if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: - m.inplace = True - elif type(m) is nn.Upsample: - m.recompute_scale_factor = None - - if len(model) == 1: - return model[-1] - else: - for k in ["names", "stride"]: - setattr(model, k, getattr(model[-1], k)) - return model - - def save_yolo_predictions_by_model(result, save_dir, image_path, model_name): model_save_dir = os.path.join(save_dir, model_name) os.makedirs(model_save_dir, exist_ok=True) @@ -99,53 +67,61 @@ def save_yolo_predictions_by_model(result, save_dir, image_path, model_name): print(f"Predictions saved to {output_path}") +@skip_for_grayskull() +@pytest.mark.parametrize( + "use_pretrained_weight", + [ + False, + # True # uncomment to run the model for real weights + ], + ids=[ + "pretrained_weight_false", + # "pretrained_weight_true", # uncomment to run the model for real weights + ], +) @pytest.mark.parametrize("device_params", [{"l1_small_size": 32768}], indirect=True) @pytest.mark.parametrize( - "source, model_type", + "source, model_type,resolution", [ - ("models/experimental/functional_yolov11/demo/images/cycle_girl.jpg", "torch_model"), - ("models/experimental/functional_yolov11/demo/images/cycle_girl.jpg", "tt_model"), - ("models/experimental/functional_yolov11/demo/images/dog.jpg", "torch_model"), - ("models/experimental/functional_yolov11/demo/images/dog.jpg", "tt_model"), + # 224*224 + # ("models/experimental/functional_yolov11/demo/images/cycle_girl.jpg", "torch_model", [3, 224, 224]), + # ("models/experimental/functional_yolov11/demo/images/cycle_girl.jpg", "tt_model", [3, 224, 224]), + # ("models/experimental/functional_yolov11/demo/images/dog.jpg", "torch_model", [3, 224, 224]), + # ("models/experimental/functional_yolov11/demo/images/dog.jpg", "tt_model", [3, 224, 224]), + # 640*640 + # ("models/experimental/functional_yolov11/demo/images/cycle_girl.jpg", "torch_model", [3, 640, 640]), + ("models/experimental/functional_yolov11/demo/images/cycle_girl.jpg", "tt_model", [3, 640, 640]), + # ("models/experimental/functional_yolov11/demo/images/dog.jpg", "torch_model", [3, 640, 640]), + # ("models/experimental/functional_yolov11/demo/images/dog.jpg", "tt_model", [3, 640, 640]), ], ) -def test_demo(device, source, model_type): +def test_demo(device, source, model_type, resolution, use_pretrained_weight): disable_persistent_kernel_cache() - - if model_type == "torch_model": - model = attempt_load("models/experimental/functional_yolov11/reference/yolo11n.pt", map_location="cpu") - state_dict = model.state_dict() - model = yolov11.YoloV11() + model = yolov11.YoloV11() + if use_pretrained_weight: + logger.info(f"Demo Inferencing with Pre-trained Weights") + state_dict = attempt_load("yolo11n.pt", map_location="cpu").state_dict() ds_state_dict = {k: v for k, v in state_dict.items()} new_state_dict = {} for (name1, parameter1), (name2, parameter2) in zip(model.state_dict().items(), ds_state_dict.items()): if isinstance(parameter2, torch.FloatTensor): new_state_dict[name1] = parameter2 model.load_state_dict(new_state_dict) + else: + logger.info(f"Demo Inferencing with Random Weights") + if model_type == "torch_model": model.eval() logger.info("Inferencing using Torch Model") else: torch_input, ttnn_input = create_yolov11_input_tensors( - device, input_channels=3, input_height=224, input_width=224 + device, input_channels=resolution[0], input_height=resolution[1], input_width=resolution[2] ) - torch_model = attempt_load("models/experimental/functional_yolov11/reference/yolo11n.pt", map_location="cpu") - state_dict = torch_model.state_dict() - torch_model = yolov11.YoloV11() - ds_state_dict = {k: v for k, v in state_dict.items()} - new_state_dict = {} - for (name1, parameter1), (name2, parameter2) in zip(torch_model.state_dict().items(), ds_state_dict.items()): - if isinstance(parameter2, torch.FloatTensor): - new_state_dict[name1] = parameter2 - torch_model.load_state_dict(new_state_dict) - torch_model.eval() - parameters = create_yolov11_model_parameters(torch_model, torch_input, device=device) + parameters = create_yolov11_model_parameters(model, torch_input, device=device) model = ttnn_yolov11.YoloV11(device, parameters) logger.info("Inferencing using ttnn Model") save_dir = "models/experimental/functional_yolov11/demo/runs" - dataset = LoadImages(path=source) - model_save_dir = os.path.join(save_dir, model_type) os.makedirs(model_save_dir, exist_ok=True) @@ -234,10 +210,9 @@ def test_demo(device, source, model_type): for batch in dataset: paths, im0s, s = batch - im = preprocess(im0s) + im = preprocess(im0s, resolution) if model_type == "torch_model": preds = model(im) - print("preds in torch", preds.shape) else: img = torch.permute(im, (0, 2, 3, 1)) img = img.reshape( @@ -247,16 +222,8 @@ def test_demo(device, source, model_type): img.shape[3], ) ttnn_im = ttnn.from_torch(img, layout=ttnn.TILE_LAYOUT, dtype=ttnn.bfloat8_b) - # ttnn_im = ttnn.from_torch(img, layout=ttnn.ROW_MAJOR_LAYOUT, dtype=ttnn.bfloat16) - # print("input tensor in demo",ttnn_im.shape,ttnn_im.dtype,ttnn_im.memory_config(),ttnn_im.layout) preds = model(x=ttnn_im) preds = ttnn.to_torch(preds, dtype=torch.float32) - print("preds in ttnn", preds.shape) - results = postprocess(preds, im, im0s, batch, names)[0] - save_yolo_predictions_by_model(results, save_dir, source, model_type) - # input tensor in demo Shape([1, 1, 409600, 3]) DataType.BFLOAT8_B MemoryConfig(memory_layout=TensorMemoryLayout::INTERLEAVED,buffer_type=BufferType::DRAM,shard_spec=std::nullopt) Layout.TILE - - # input tensor in demo Shape([1, 1, 409600, 3]) DataType.BFLOAT16 MemoryConfig(memory_layout=TensorMemoryLayout::INTERLEAVED,buffer_type=BufferType::DRAM,shard_spec=std::nullopt) Layout.ROW_MAJOR print("Inference done") diff --git a/models/experimental/functional_yolov11/demo/demo_utils.py b/models/experimental/functional_yolov11/demo/demo_utils.py index 5701b1ada36..d603baae08f 100644 --- a/models/experimental/functional_yolov11/demo/demo_utils.py +++ b/models/experimental/functional_yolov11/demo/demo_utils.py @@ -114,15 +114,19 @@ def LetterBox(img, new_shape=(224, 224), auto=False, scaleFill=False, scaleup=Tr return img -def pre_transform(im): - return [LetterBox(img=x) for x in im] +def pre_transform(im, LetterBox_shape=(224, 224)): + return [LetterBox(img=x, new_shape=LetterBox_shape) for x in im] -def preprocess(im): +def preprocess(im, resolution): device = "cpu" not_tensor = not isinstance(im, torch.Tensor) if not_tensor: - im = np.stack(pre_transform(im)) + if resolution[1] == 224: + LetterBox_shape = (224, 224) + else: + LetterBox_shape = (640, 640) + im = np.stack(pre_transform(im, LetterBox_shape)) im = im[..., ::-1].transpose((0, 3, 1, 2)) im = np.ascontiguousarray(im) im = torch.from_numpy(im) diff --git a/models/experimental/functional_yolov11/demo/runs/torch_model/prediction_torch_224_1.jpg b/models/experimental/functional_yolov11/demo/runs/torch_model/prediction_torch_224_1.jpg new file mode 100644 index 00000000000..c0fb3dec911 Binary files /dev/null and b/models/experimental/functional_yolov11/demo/runs/torch_model/prediction_torch_224_1.jpg differ diff --git a/models/experimental/functional_yolov11/demo/runs/torch_model/prediction_torch_640_1.jpg b/models/experimental/functional_yolov11/demo/runs/torch_model/prediction_torch_640_1.jpg new file mode 100644 index 00000000000..03d33858ddf Binary files /dev/null and b/models/experimental/functional_yolov11/demo/runs/torch_model/prediction_torch_640_1.jpg differ diff --git a/models/experimental/functional_yolov11/demo/runs/tt_model/prediction_ttnn_224_1.jpg b/models/experimental/functional_yolov11/demo/runs/tt_model/prediction_ttnn_224_1.jpg new file mode 100644 index 00000000000..aeacb0426be Binary files /dev/null and b/models/experimental/functional_yolov11/demo/runs/tt_model/prediction_ttnn_224_1.jpg differ diff --git a/models/experimental/functional_yolov11/demo/runs/tt_model/prediction_ttnn_640_1.jpg b/models/experimental/functional_yolov11/demo/runs/tt_model/prediction_ttnn_640_1.jpg new file mode 100644 index 00000000000..e4a4a4481b3 Binary files /dev/null and b/models/experimental/functional_yolov11/demo/runs/tt_model/prediction_ttnn_640_1.jpg differ diff --git a/models/experimental/functional_yolov11/readme.md b/models/experimental/functional_yolov11/readme.md deleted file mode 100644 index c11208a12eb..00000000000 --- a/models/experimental/functional_yolov11/readme.md +++ /dev/null @@ -1,11 +0,0 @@ -# Current status - -Facing ```ModuleNotFoundError: No module named 'ultralytics'``` issue while loading the weights file, so implemented with random weights -- Finished reference implementation. -- Completed ttnn implementation of Conv, Bottleneck, SPPF, C3K, C3K2 - -## PCC: -Conv - 0.99
-C3K2 with bottleneck - 0.99
-C3K2 with C3k - facing shape mismatch error
-SPPF - Not checked yet
diff --git a/models/experimental/functional_yolov11/reference/yolov11.py b/models/experimental/functional_yolov11/reference/yolov11.py index 2b34b89a651..3d26a2a2dec 100644 --- a/models/experimental/functional_yolov11/reference/yolov11.py +++ b/models/experimental/functional_yolov11/reference/yolov11.py @@ -1,14 +1,15 @@ -# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. +# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc. # SPDX-License-Identifier: Apache-2.0 +from pathlib import Path +import os import torch import torch.nn as nn import torch.nn.functional as f import math - -# from torchview import draw_graph import torch +import time def make_anchors(feats, strides, grid_cell_offset=0.5): @@ -204,7 +205,6 @@ def __init__(self, in_channel, out_channel, kernel, stride, padding, dilation, g ) def forward(self, x): - # x1 = self.cv1(x) x2 = self.cv2(x) x = self.m(x1) @@ -216,9 +216,6 @@ def forward(self, x): class C3k2(nn.Module): def __init__(self, in_channel, out_channel, kernel, stride, padding, dilation, groups, is_bk_enabled=False): super().__init__() - # - # f"c3k2 init is called,{in_channel}, {out_channel}, {kernel},{stride}, {padding}, {dilation}, {groups},{is_bk_enabled}" - # ) self.is_bk_enabled = is_bk_enabled if is_bk_enabled: self.cv1 = Conv( @@ -714,31 +711,22 @@ def forward(self, y1, y2, y3): ya = torch.reshape(ya, (ya.shape[0], int(ya.shape[1] / self.in_channel[24]), self.in_channel[24], ya.shape[2])) ya = torch.permute(ya, (0, 2, 1, 3)) - ya = f.softmax(ya, dim=1) # torch.Size([1, 16, 4, 1029]) + ya = f.softmax(ya, dim=1) c = self.dfl(ya) c1 = torch.reshape(c, (c.shape[0], c.shape[1] * c.shape[2], c.shape[3])) c2 = c1 c1 = c1[:, 0:2, :] c2 = c2[:, 2:4, :] - anchor, strides = (y_all.transpose(0, 1) for y_all in make_anchors(y_all, [8, 16, 32], 0.5)) anchor.unsqueeze(0) - c1 = anchor - c1 c2 = anchor + c2 - - # print(c1.shape, c2.shape) - z1 = c2 - c1 z2 = c1 + c2 - z2 = z2 / 2 - z = torch.concat((z2, z1), 1) z = z * strides - # yb = torch.load("yb.pt") yb = torch.sigmoid(yb) - # return yb out = torch.concat((z, yb), 1) return out @@ -890,24 +878,19 @@ def forward(self, x): x = self.model[2](x) # 2 x = self.model[3](x) # 3 x = self.model[4](x) # 4 - # torch.save(x, "/home/ubuntu/tt-metal/models/experimental/functional_yolov11/dumps/torch_out.pth") x4 = x x = self.model[5](x) # 5 x = self.model[6](x) # 6 x6 = x x = self.model[7](x) # 7 x = self.model[8](x) # 8 - x = self.model[9](x) # 9 x = self.model[10](x) # 10 - # torch.save(x,"/home/ubuntu/venkatesh_yolov11/tt-metal/models/experimental/functional_yolov11/dumps/torch_out.pth") x10 = x - # print("input to upsample1 is ", x.shape) x = f.upsample(x, scale_factor=2.0) # 11 x = torch.cat((x, x6), 1) # 12 x = self.model[13](x) # 13 x13 = x - # print("input to upsample2 is ", x.shape) x = f.upsample(x, scale_factor=2.0) # 14 x = torch.cat((x, x4), 1) # 15 x = self.model[16](x) # 16 @@ -941,28 +924,6 @@ def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, s super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act) -# class DFL(nn.Module): -# """ -# Integral module of Distribution Focal Loss (DFL). - -# Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391 -# """ - -# def __init__(self, c1=16): -# """Initialize a convolutional layer with a given number of input channels.""" -# super().__init__() -# self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False) -# x = torch.arange(c1, dtype=torch.float) -# self.conv.weight.data[:] = nn.Parameter(x.view(1, c1, 1, 1)) -# self.c1 = c1 - -# def forward(self, x): -# """Applies a transformer layer on input tensor 'x' and returns a tensor.""" -# b, _, a = x.shape # batch, channels, anchors -# return self.conv(x.view(b, 4, self.c1, a).transpose(2, 1).softmax(1)).view(b, 4, a) -# # return self.conv(x.view(b, self.c1, 4, a).softmax(1)).view(b, 4, a) - - class BaseModel(nn.Module): def forward(self, x, *args, **kwargs): if isinstance(x, dict): @@ -987,14 +948,61 @@ def __init__(self, cfg="yolov8n.yaml", ch=3, nc=None, verbose=True): super().__init__() -# model = YoloV11() -# model_graph = draw_graph( -# model, -# input_size=(1, 3, 224, 224), -# dtypes=[torch.float32], -# expand_nested=True, -# graph_name="yolov11_ref", -# depth=10, -# directory=".", -# ) -# model_graph.visual_graph.render(format="pdf") +class Ensemble(nn.ModuleList): + def __init__(self): + super(Ensemble, self).__init__() + + def forward(self, x, augment=False): + y = [] + for module in self: + y.append(module(x, augment)[0]) + y = torch.cat(y, 1) + return y, None + + +def attempt_download(file, repo="ultralytics/assets", key="reference"): + tests = Path(__file__).parent.parent / key + file_path = tests / Path(str(file).strip().replace("'", "").lower()) + if not file_path.exists(): + name = "yolo11n.pt" + msg = f"{file_path} missing, try downloading from https://github.com/{repo}/releases/" + + try: + url = f"https://github.com/{repo}/releases/download/v8.3.0/{name}" + + print(f"Downloading {url} to {file_path}...") + torch.hub.download_url_to_file(url, file_path) + assert file_path.exists() and file_path.stat().st_size > 1e6, f"Download failed for {name}" + + except Exception as e: + print(f"Error downloading from GitHub: {e}. Trying secondary source...") + url = f"https://storage.googleapis.com/{repo}/ckpt/{name}" + print(f"Downloading {url} to {file_path}...") + os.system(f"curl -L {url} -o {file_path}") + if not file_path.exists() or file_path.stat().st_size < 1e6: + file_path.unlink(missing_ok=True) + print(f"ERROR: Download failure for {msg}") + else: + print(f"Download succeeded from secondary source!") + return file_path + + +def attempt_load(weights, map_location=None): + model = Ensemble() + for w in weights if isinstance(weights, list) else [weights]: + weight_path = attempt_download(w) + ckpt = torch.load(weight_path, map_location=map_location) + model.append(ckpt["ema" if ckpt.get("ema") else "model"].float().eval()) + + for m in model.modules(): + if isinstance(m, (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU)): + m.inplace = True + elif isinstance(m, nn.Upsample): + m.recompute_scale_factor = None + + if len(model) == 1: + return model[-1] + else: + for k in ["names", "stride"]: + setattr(model, k, getattr(model[-1], k)) + return model diff --git a/models/experimental/functional_yolov11/test/test_ttnn_attention.py b/models/experimental/functional_yolov11/test/test_ttnn_attention.py deleted file mode 100644 index ed6d3fae468..00000000000 --- a/models/experimental/functional_yolov11/test/test_ttnn_attention.py +++ /dev/null @@ -1,53 +0,0 @@ -# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc. -# SPDX-License-Identifier: Apache-2.0 -import pytest -import ttnn -import torch -from tests.ttnn.utils_for_testing import assert_with_pcc -from models.experimental.functional_yolov11.tt.model_preprocessing import ( - create_yolov11_input_tensors, - create_yolov11_model_parameters, -) -from models.experimental.functional_yolov11.reference.yolov11 import Attention as torch_attention -from models.experimental.functional_yolov11.tt.ttnn_yolov11 import Attention as ttnn_attention - - -@pytest.mark.parametrize( - "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape", - [ - ([128, 128, 128], [256, 128, 128], [1, 1, 3], [1, 1, 1], [0, 0, 1], [1, 1, 1], [1, 1, 128], [1, 128, 7, 7]), - ], -) -@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) -def test_yolo_v11_attention( - device, - use_program_cache, - reset_seeds, - in_channel, - out_channel, - kernel, - stride, - padding, - dilation, - groups, - fwd_input_shape, -): - torch_module = torch_attention(in_channel, out_channel, kernel, stride, padding, dilation, groups) - torch_module.eval() - torch_input, ttnn_input = create_yolov11_input_tensors( - device, - batch=fwd_input_shape[0], - input_channels=fwd_input_shape[1], - input_height=fwd_input_shape[2], - input_width=fwd_input_shape[3], - ) - ttnn_input = ttnn.to_device(ttnn_input, device=device) - ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG) - torch_output = torch_module(torch_input) - parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device) - ttnn_module = ttnn_attention(device=device, parameter=parameters.conv_args, conv_pt=parameters) - ttnn_output = ttnn_module(x=ttnn_input, device=device) - ttnn_output = ttnn.to_torch(ttnn_output) - ttnn_output = ttnn_output.permute(0, 3, 1, 2) - ttnn_output = ttnn_output.reshape(torch_output.shape) - assert_with_pcc(torch_output, ttnn_output, 0.99999) diff --git a/models/experimental/functional_yolov11/test/test_ttnn_bottleneck.py b/models/experimental/functional_yolov11/test/test_ttnn_bottleneck.py deleted file mode 100644 index 3aee44b8e84..00000000000 --- a/models/experimental/functional_yolov11/test/test_ttnn_bottleneck.py +++ /dev/null @@ -1,59 +0,0 @@ -# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc. -# SPDX-License-Identifier: Apache-2.0 -import pytest -import ttnn -from tests.ttnn.utils_for_testing import assert_with_pcc -from models.experimental.functional_yolov11.tt.model_preprocessing import ( - create_yolov11_input_tensors, - create_yolov11_model_parameters, -) -from models.experimental.functional_yolov11.reference.yolov11 import Bottleneck as torch_bottleneck -from models.experimental.functional_yolov11.tt.ttnn_yolov11 import Bottleneck as ttnn_bottleneck - - -@pytest.mark.parametrize( - "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape", - [ - ([16, 8], [8, 16], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 16, 56, 56]), # 1 - ([32, 16], [16, 32], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 32, 38, 28]), # 2 - ([32, 32], [32, 32], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 32, 14, 14]), # 3 - ([64, 64], [64, 64], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 64, 7, 7]), - ([64, 32], [32, 64], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 64, 14, 14]), - ([32, 16], [16, 32], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 32, 28, 28]), - ([64, 32], [32, 64], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 64, 14, 14]), - ([64, 64], [64, 64], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 64, 7, 7]), - ], -) -@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) -def test_yolo_v11_bottleneck( - device, - use_program_cache, - reset_seeds, - in_channel, - out_channel, - kernel, - stride, - padding, - dilation, - groups, - fwd_input_shape, -): - torch_module = torch_bottleneck(in_channel, out_channel, kernel, stride, padding, dilation, groups) - torch_module.eval() - torch_input, ttnn_input = create_yolov11_input_tensors( - device, - batch=fwd_input_shape[0], - input_channels=fwd_input_shape[1], - input_height=fwd_input_shape[2], - input_width=fwd_input_shape[3], - ) - ttnn_input = ttnn.to_device(ttnn_input, device=device) - ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG) - torch_output = torch_module(torch_input) - parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device) - ttnn_module = ttnn_bottleneck(device=device, parameter=parameters.conv_args, conv_pt=parameters) - ttnn_output = ttnn_module(x=ttnn_input, device=device) - ttnn_output = ttnn.to_torch(ttnn_output) - ttnn_output = ttnn_output.permute(0, 3, 1, 2) - ttnn_output = ttnn_output.reshape(torch_output.shape) - assert_with_pcc(torch_output, ttnn_output, 0.99999) diff --git a/models/experimental/functional_yolov11/test/test_ttnn_c2psa.py b/models/experimental/functional_yolov11/test/test_ttnn_c2psa.py deleted file mode 100644 index 25b6bbe79f5..00000000000 --- a/models/experimental/functional_yolov11/test/test_ttnn_c2psa.py +++ /dev/null @@ -1,62 +0,0 @@ -# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc. -# SPDX-License-Identifier: Apache-2.0 -import pytest -import ttnn -import torch -from tests.ttnn.utils_for_testing import assert_with_pcc -from models.experimental.functional_yolov11.tt.model_preprocessing import ( - create_yolov11_input_tensors, - create_yolov11_model_parameters, -) -from models.experimental.functional_yolov11.reference.yolov11 import C2PSA as torch_c2psa_block -from models.experimental.functional_yolov11.tt.ttnn_yolov11 import C2PSA as ttnn_c2psa_block - - -@pytest.mark.parametrize( - "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape", - [ - ( - [256, 256, 128, 128, 128, 128, 256], - [256, 256, 256, 128, 128, 256, 128], - [1, 1, 1, 1, 3, 1, 1], - [1, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 0, 1, 0, 0], - [1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 128, 1, 1], - [1, 256, 7, 7], - ), - ], -) -@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) -def test_yolo_v11_c2psa_block( - device, - use_program_cache, - reset_seeds, - in_channel, - out_channel, - kernel, - stride, - padding, - dilation, - groups, - fwd_input_shape, -): - torch_module = torch_c2psa_block(in_channel, out_channel, kernel, stride, padding, dilation, groups) - torch_module.eval() - torch_input, ttnn_input = create_yolov11_input_tensors( - device, - batch=fwd_input_shape[0], - input_channels=fwd_input_shape[1], - input_height=fwd_input_shape[2], - input_width=fwd_input_shape[3], - ) - ttnn_input = ttnn.to_device(ttnn_input, device=device) - ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG) - torch_output = torch_module(torch_input) - parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device) - ttnn_module = ttnn_c2psa_block(device=device, parameter=parameters.conv_args, conv_pt=parameters) - ttnn_output = ttnn_module(x=ttnn_input, device=device) - ttnn_output = ttnn.to_torch(ttnn_output) - ttnn_output = ttnn_output.permute(0, 3, 1, 2) - ttnn_output = ttnn_output.reshape(torch_output.shape) - assert_with_pcc(torch_output, ttnn_output, 0.99999) diff --git a/models/experimental/functional_yolov11/test/test_ttnn_c3k.py b/models/experimental/functional_yolov11/test/test_ttnn_c3k.py deleted file mode 100644 index 13aade02c0e..00000000000 --- a/models/experimental/functional_yolov11/test/test_ttnn_c3k.py +++ /dev/null @@ -1,93 +0,0 @@ -# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc. -# SPDX-License-Identifier: Apache-2.0 - -import pytest -import ttnn -import torch -from tests.ttnn.utils_for_testing import assert_with_pcc -from models.experimental.functional_yolov11.tt.model_preprocessing import ( - create_yolov11_input_tensors, - create_yolov11_model_parameters, -) -from models.experimental.functional_yolov11.reference.yolov11 import C3k as torch_c3k -from models.experimental.functional_yolov11.tt.ttnn_yolov11 import C3K as ttnn_c3k - - -@pytest.mark.parametrize( - "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape", - [ - ( - [64, 64, 64, 32, 32, 32, 32], - [32, 32, 64, 32, 32, 32, 32], - [1, 1, 1, 3, 3, 3, 3], - [1, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1], - [1, 64, 14, 14], - ), - ( - [128, 128, 128, 64, 64, 64, 64], - [64, 64, 128, 64, 64, 64, 64], - [1, 1, 1, 3, 3, 3, 3], - [1, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1], - [1, 128, 7, 7], - ), - ( - [64, 64, 64, 32, 32, 32, 32], - [32, 32, 64, 32, 32, 32, 32], - [1, 1, 1, 3, 3, 3, 3], - [1, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1], - [1, 64, 40, 40], - ), - ( - [128, 128, 128, 64, 64, 64, 64], - [64, 64, 128, 64, 64, 64, 64], - [1, 1, 1, 3, 3, 3, 3], - [1, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1], - [1, 128, 20, 20], - ), - ], -) -@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) -def test_yolo_v11_c3k( - device, - use_program_cache, - reset_seeds, - in_channel, - out_channel, - kernel, - stride, - padding, - dilation, - groups, - fwd_input_shape, -): - torch_module = torch_c3k(in_channel, out_channel, kernel, stride, padding, dilation, groups) - torch_module.eval() - torch_input, ttnn_input = create_yolov11_input_tensors( - device, - batch=fwd_input_shape[0], - input_channels=fwd_input_shape[1], - input_height=fwd_input_shape[2], - input_width=fwd_input_shape[3], - ) - ttnn_input = ttnn.to_device(ttnn_input, device=device) - ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG) - torch_output = torch_module(torch_input) - parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device) - ttnn_module = ttnn_c3k(device=device, parameter=parameters.conv_args, conv_pt=parameters) - ttnn_output = ttnn_module(x=ttnn_input, device=device) # ttnn.Shape([1, 1, 224, 64]) - ttnn_output = ttnn.to_torch(ttnn_output) - ttnn_output = ttnn_output.permute(0, 3, 1, 2) - ttnn_output = ttnn_output.reshape(torch_output.shape) - assert_with_pcc(torch_output, ttnn_output, 0.99999) diff --git a/models/experimental/functional_yolov11/test/test_ttnn_c3k2.py b/models/experimental/functional_yolov11/test/test_ttnn_c3k2.py deleted file mode 100644 index 0c75d152c17..00000000000 --- a/models/experimental/functional_yolov11/test/test_ttnn_c3k2.py +++ /dev/null @@ -1,234 +0,0 @@ -# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc. -# SPDX-License-Identifier: Apache-2.0 - -import pytest -import ttnn -import torch -from tests.ttnn.utils_for_testing import assert_with_pcc -from models.experimental.functional_yolov11.tt.model_preprocessing import ( - create_yolov11_input_tensors, - create_yolov11_model_parameters, -) -from models.experimental.functional_yolov11.reference.yolov11 import C3k2 as torch_c3k2 -from models.experimental.functional_yolov11.tt.ttnn_yolov11 import C3k2 as ttnn_c3k2 - - -@pytest.mark.parametrize( - "in_channel, out_channel, kernel, stride, padding, dilation, groups,is_bk_enabled,fwd_input_shape", - [ - # 224 - # ( - # [32, 48, 16, 8], - # [32, 64, 8, 16], - # [1, 1, 3, 3], - # [1, 1, 1, 1], - # [0, 0, 1, 1], - # [1, 1, 1, 1], - # [1, 1, 1, 1], - # True, - # [1, 32, 56, 56], - # ), - # ( - # [64, 96, 32, 16], - # [64, 128, 16, 32], - # [1, 1, 3, 3], - # [1, 1, 1, 1], - # [0, 0, 1, 1], - # [1, 1, 1, 1], - # [1, 1, 1, 1], - # True, - # [1, 64, 28, 28], - # ), - # ( - # [128, 192, 64, 64, 64, 32, 32, 32, 32], - # [128, 128, 32, 32, 64, 32, 32, 32, 32], - # [1, 1, 1, 1, 1, 3, 3, 3, 3], - # [1, 1, 1, 1, 1, 1, 1, 1, 1], - # [0, 0, 0, 0, 0, 1, 1, 1, 1], - # [1, 1, 1, 1, 1, 1, 1, 1, 1], - # [1, 1, 1, 1, 1, 1, 1, 1, 1], - # False, - # [1, 128, 14, 14], - # ), - # ( - # [256, 384, 128, 128, 128, 64, 64, 64, 64], - # [256, 256, 64, 64, 128, 64, 64, 64, 64], - # [1, 1, 1, 1, 1, 3, 3, 3, 3], - # [1, 1, 1, 1, 1, 1, 1, 1, 1], - # [0, 0, 0, 0, 0, 1, 1, 1, 1], - # [1, 1, 1, 1, 1, 1, 1, 1, 1], - # [1, 1, 1, 1, 1, 1, 1, 1, 1], - # False, - # [1, 256, 7, 7], - # ), - # ( - # [384, 192, 64, 32], - # [128, 128, 32, 64], - # [1, 1, 3, 3], - # [1, 1, 1, 1], - # [0, 0, 1, 1], - # [1, 1, 1, 1], - # [1, 1, 1, 1], - # True, - # [1, 384, 14, 14], - # ), - # ( - # [256, 96, 32, 16], - # [64, 64, 16, 32], - # [1, 1, 3, 3], - # [1, 1, 1, 1], - # [0, 0, 1, 1], - # [1, 1, 1, 1], - # [1, 1, 1, 1], - # True, - # [1, 256, 28, 28], - # ), - # ( - # [192, 192, 64, 32], - # [128, 128, 32, 64], - # [1, 1, 3, 3], - # [1, 1, 1, 1], - # [0, 0, 1, 1], - # [1, 1, 1, 1], - # [1, 1, 1, 1], - # True, - # [1, 192, 14, 14], - # ), - # ( - # [384, 384, 128, 128, 128, 64, 64, 64, 64], - # [256, 256, 64, 64, 128, 64, 64, 64, 64], - # [1, 1, 1, 1, 1, 3, 3, 3, 3], - # [1, 1, 1, 1, 1, 1, 1, 1, 1], - # [0, 0, 0, 0, 0, 1, 1, 1, 1], - # [1, 1, 1, 1, 1, 1, 1, 1, 1], - # [1, 1, 1, 1, 1, 1, 1, 1, 1], - # False, - # [1, 384, 7, 7], - # ), - # #640 - ( - [32, 48, 16, 8], - [32, 64, 8, 16], - [1, 1, 3, 3], - [1, 1, 1, 1], - [0, 0, 1, 1], - [1, 1, 1, 1], - [1, 1, 1, 1], - True, - [1, 32, 160, 160], - ), - ( - [64, 96, 32, 16], - [64, 128, 16, 32], - [1, 1, 3, 3], - [1, 1, 1, 1], - [0, 0, 1, 1], - [1, 1, 1, 1], - [1, 1, 1, 1], - True, - [1, 64, 80, 80], - ), - ( - [128, 192, 64, 64, 64, 32, 32, 32, 32], - [128, 128, 32, 32, 64, 32, 32, 32, 32], - [1, 1, 1, 1, 1, 3, 3, 3, 3], - [1, 1, 1, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 0, 0, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1], - False, - [1, 128, 40, 40], - ), - ( - [256, 384, 128, 128, 128, 64, 64, 64, 64], - [256, 256, 64, 64, 128, 64, 64, 64, 64], - [1, 1, 1, 1, 1, 3, 3, 3, 3], - [1, 1, 1, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 0, 0, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1], - False, - [1, 256, 20, 20], - ), - ( - [384, 192, 64, 32], - [128, 128, 32, 64], - [1, 1, 3, 3], - [1, 1, 1, 1], - [0, 0, 1, 1], - [1, 1, 1, 1], - [1, 1, 1, 1], - True, - [1, 384, 40, 40], - ), - ( - [256, 96, 32, 16], - [64, 64, 16, 32], - [1, 1, 3, 3], - [1, 1, 1, 1], - [0, 0, 1, 1], - [1, 1, 1, 1], - [1, 1, 1, 1], - True, - [1, 256, 80, 80], - ), - ( - [192, 192, 64, 32], - [128, 128, 32, 64], - [1, 1, 3, 3], - [1, 1, 1, 1], - [0, 0, 1, 1], - [1, 1, 1, 1], - [1, 1, 1, 1], - True, - [1, 192, 40, 40], - ), - ( - [384, 384, 128, 128, 128, 64, 64, 64, 64], - [256, 256, 64, 64, 128, 64, 64, 64, 64], - [1, 1, 1, 1, 1, 3, 3, 3, 3], - [1, 1, 1, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 0, 0, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1], - False, - [1, 384, 20, 20], - ), - ], -) -@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) -def test_yolo_v11_c3k2( - device, - use_program_cache, - reset_seeds, - in_channel, - out_channel, - kernel, - stride, - padding, - dilation, - groups, - is_bk_enabled, - fwd_input_shape, -): - torch_module = torch_c3k2(in_channel, out_channel, kernel, stride, padding, dilation, groups, is_bk_enabled) - torch_module.eval() - torch_input, ttnn_input = create_yolov11_input_tensors( - device, - batch=fwd_input_shape[0], - input_channels=fwd_input_shape[1], - input_height=fwd_input_shape[2], - input_width=fwd_input_shape[3], - ) - ttnn_input = ttnn.to_device(ttnn_input, device=device, memory_config=ttnn.L1_MEMORY_CONFIG) - # ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, ) - torch_output = torch_module(torch_input) - parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device) - ttnn_module = ttnn_c3k2( - device=device, parameter=parameters.conv_args, conv_pt=parameters, is_bk_enabled=is_bk_enabled - ) - ttnn_output = ttnn_module(x=ttnn_input, device=device) - ttnn_output = ttnn.to_torch(ttnn_output) - ttnn_output = ttnn_output.permute(0, 3, 1, 2) - ttnn_output = ttnn_output.reshape(torch_output.shape) - assert_with_pcc(torch_output, ttnn_output, 0.99999) diff --git a/models/experimental/functional_yolov11/test/test_ttnn_detect.py b/models/experimental/functional_yolov11/test/test_ttnn_detect.py deleted file mode 100644 index 2108bf23eb2..00000000000 --- a/models/experimental/functional_yolov11/test/test_ttnn_detect.py +++ /dev/null @@ -1,86 +0,0 @@ -# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. -# SPDX-License-Identifier: Apache-2.0 -import pytest -import ttnn -import torch -from tests.ttnn.utils_for_testing import assert_with_pcc -from models.experimental.functional_yolov11.tt.model_preprocessing import ( - create_yolov11_input_tensors, - create_yolov11_model_parameters_detect, - create_yolov11_model_parameters, -) -from models.experimental.functional_yolov11.reference.yolov11 import Detect as torch_detect -from models.experimental.functional_yolov11.tt.ttnn_yolov11 import Detect as ttnn_detect -from ttnn.model_preprocessing import preprocess_model_parameters -import math - - -@pytest.mark.parametrize( - "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape", - [ - ( - [64, 64, 64, 128, 64, 64, 256, 64, 64, 64, 64, 80, 80, 80, 128, 128, 80, 80, 80, 256, 256, 80, 80, 80, 16], - [64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 80, 80, 80, 80, 128, 80, 80, 80, 80, 256, 80, 80, 80, 80, 1], - [3, 3, 1, 3, 3, 1, 3, 3, 1, 3, 1, 3, 1, 1, 3, 1, 3, 1, 1, 3, 1, 3, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 64, 1, 80, 1, 1, 128, 1, 80, 1, 1, 256, 1, 80, 1, 1, 1], - [[1, 64, 28, 28], [1, 128, 14, 14], [1, 256, 7, 7]], - ), - ], -) -@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) -def test_yolo_v11_detect( - device, - use_program_cache, - reset_seeds, - in_channel, - out_channel, - kernel, - stride, - padding, - dilation, - groups, - fwd_input_shape, -): - torch_module = torch_detect(in_channel, out_channel, kernel, stride, padding, dilation, groups) - torch_module.eval() - torch_input_1, ttnn_input_1 = create_yolov11_input_tensors( - device, - batch=fwd_input_shape[0][0], - input_channels=fwd_input_shape[0][1], - input_height=fwd_input_shape[0][2], - input_width=fwd_input_shape[0][3], - ) - torch_input_2, ttnn_input_2 = create_yolov11_input_tensors( - device, - batch=fwd_input_shape[1][0], - input_channels=fwd_input_shape[1][1], - input_height=fwd_input_shape[1][2], - input_width=fwd_input_shape[1][3], - ) - torch_input_3, ttnn_input_3 = create_yolov11_input_tensors( - device, - batch=fwd_input_shape[2][0], - input_channels=fwd_input_shape[2][1], - input_height=fwd_input_shape[2][2], - input_width=fwd_input_shape[2][3], - ) - ttnn_input_1 = ttnn.to_device(ttnn_input_1, device=device) - ttnn_input_1 = ttnn.to_layout(ttnn_input_1, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG) - ttnn_input_2 = ttnn.to_device(ttnn_input_2, device=device) - ttnn_input_2 = ttnn.to_layout(ttnn_input_2, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG) - ttnn_input_3 = ttnn.to_device(ttnn_input_3, device=device) - ttnn_input_3 = ttnn.to_layout(ttnn_input_3, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG) - torch_output = torch_module(torch_input_1, torch_input_2, torch_input_3) - parameters = create_yolov11_model_parameters_detect( - torch_module, torch_input_1, torch_input_2, torch_input_3, device=device - ) - ttnn_module = ttnn_detect(device=device, parameter=parameters.model, conv_pt=parameters) - - ttnn_output = ttnn_module(y1=ttnn_input_1, y2=ttnn_input_2, y3=ttnn_input_3, device=device) - ttnn_output = ttnn.to_torch(ttnn_output) - # ttnn_output = ttnn_output.permute(0, 2, 1) - ttnn_output = ttnn_output.reshape(torch_output.shape) - assert_with_pcc(torch_output, ttnn_output, 0.99999) diff --git a/models/experimental/functional_yolov11/test/test_ttnn_dtype_issue.py b/models/experimental/functional_yolov11/test/test_ttnn_dtype_issue.py deleted file mode 100644 index 256d7cee684..00000000000 --- a/models/experimental/functional_yolov11/test/test_ttnn_dtype_issue.py +++ /dev/null @@ -1,18 +0,0 @@ -# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc. - -# SPDX-License-Identifier: Apache-2.0 - - -import pytest -import ttnn -import torch -from tests.ttnn.utils_for_testing import assert_with_pcc - - -@pytest.mark.parametrize("device_params", [{"l1_small_size": 16384}], indirect=True) -def test_dtype_issue(device): - a = torch.randn((1, 256, 1, 49), dtype=torch.bfloat16) - a_ttnn = ttnn.from_torch(a, device=device, layout=ttnn.TILE_LAYOUT, dtype=ttnn.bfloat8_b) - a_ttnn = ttnn.to_dtype(a_ttnn, ttnn.bfloat16) - ttnn_output = ttnn.to_torch(a_ttnn) - assert_with_pcc(a, ttnn_output, 0.99999) diff --git a/models/experimental/functional_yolov11/test/test_ttnn_psa_block.py b/models/experimental/functional_yolov11/test/test_ttnn_psa_block.py deleted file mode 100644 index c2643318abf..00000000000 --- a/models/experimental/functional_yolov11/test/test_ttnn_psa_block.py +++ /dev/null @@ -1,62 +0,0 @@ -# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc. -# SPDX-License-Identifier: Apache-2.0 -import pytest -import ttnn -import torch -from tests.ttnn.utils_for_testing import assert_with_pcc -from models.experimental.functional_yolov11.tt.model_preprocessing import ( - create_yolov11_input_tensors, - create_yolov11_model_parameters, -) -from models.experimental.functional_yolov11.reference.yolov11 import PSABlock as torch_psa_block -from models.experimental.functional_yolov11.tt.ttnn_yolov11 import PSABlock as ttnn_psa_block - - -@pytest.mark.parametrize( - "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape", - [ - ( - [128, 128, 128, 128, 256], - [256, 128, 128, 256, 128], - [1, 1, 3, 1, 1], - [1, 1, 1, 1, 1], - [0, 0, 1, 0, 0], - [1, 1, 1, 1, 1], - [1, 1, 128, 1, 1], - [1, 128, 7, 7], - ), - ], -) -@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) -def test_yolo_v11_psa_block( - device, - use_program_cache, - reset_seeds, - in_channel, - out_channel, - kernel, - stride, - padding, - dilation, - groups, - fwd_input_shape, -): - torch_module = torch_psa_block(in_channel, out_channel, kernel, stride, padding, dilation, groups) - torch_module.eval() - torch_input, ttnn_input = create_yolov11_input_tensors( - device, - batch=fwd_input_shape[0], - input_channels=fwd_input_shape[1], - input_height=fwd_input_shape[2], - input_width=fwd_input_shape[3], - ) - ttnn_input = ttnn.to_device(ttnn_input, device=device) - ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG) - torch_output = torch_module(torch_input) - parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device) - ttnn_module = ttnn_psa_block(device=device, parameter=parameters.conv_args, conv_pt=parameters) - ttnn_output = ttnn_module(x=ttnn_input, device=device) - ttnn_output = ttnn.to_torch(ttnn_output) - ttnn_output = ttnn_output.permute(0, 3, 1, 2) - ttnn_output = ttnn_output.reshape(torch_output.shape) - assert_with_pcc(torch_output, ttnn_output, 0.99999) diff --git a/models/experimental/functional_yolov11/test/test_ttnn_sppf.py b/models/experimental/functional_yolov11/test/test_ttnn_sppf.py deleted file mode 100644 index 8e8f6310df0..00000000000 --- a/models/experimental/functional_yolov11/test/test_ttnn_sppf.py +++ /dev/null @@ -1,54 +0,0 @@ -# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc. -# SPDX-License-Identifier: Apache-2.0 - -import pytest -import ttnn -import torch -from tests.ttnn.utils_for_testing import assert_with_pcc -from models.experimental.functional_yolov11.tt.model_preprocessing import ( - create_yolov11_input_tensors, - create_yolov11_model_parameters, -) -from models.experimental.functional_yolov11.reference.yolov11 import SPPF as torch_sppf -from models.experimental.functional_yolov11.tt.ttnn_yolov11 import SPPF as ttnn_sppf - - -@pytest.mark.parametrize( - "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape", - [ - ([256, 512], [128, 256], [1, 1], [1, 1], [0, 0], [1, 1], [1, 1], [1, 256, 20, 20]), - ], -) -@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) -def test_yolo_v11_sppf( - device, - use_program_cache, - reset_seeds, - in_channel, - out_channel, - kernel, - stride, - padding, - dilation, - groups, - fwd_input_shape, -): - torch_module = torch_sppf(in_channel, out_channel, kernel, stride, padding, dilation, groups) - torch_module.eval() - torch_input, ttnn_input = create_yolov11_input_tensors( - device, - batch=fwd_input_shape[0], - input_channels=fwd_input_shape[1], - input_height=fwd_input_shape[2], - input_width=fwd_input_shape[3], - ) - ttnn_input = ttnn.to_device(ttnn_input, device=device) - ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG) - torch_output = torch_module(torch_input) - parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device) - ttnn_module = ttnn_sppf(device=device, parameter=parameters.conv_args, conv_pt=parameters) - ttnn_output = ttnn_module(x=ttnn_input, device=device) - ttnn_output = ttnn.to_torch(ttnn_output) - ttnn_output = ttnn_output.permute(0, 3, 1, 2) - ttnn_output = ttnn_output.reshape(torch_output.shape) - assert_with_pcc(torch_output, ttnn_output, 0.99999) diff --git a/models/experimental/functional_yolov11/test/test_ttnn_yolov11.py b/models/experimental/functional_yolov11/test/test_ttnn_yolov11.py deleted file mode 100644 index f808f36236a..00000000000 --- a/models/experimental/functional_yolov11/test/test_ttnn_yolov11.py +++ /dev/null @@ -1,85 +0,0 @@ -# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc. - -# SPDX-License-Identifier: Apache-2.0 - -import pytest -import ttnn -import torch -from tests.ttnn.utils_for_testing import assert_with_pcc -import sys - -from models.experimental.functional_yolov11.reference import yolov11 - -from models.experimental.functional_yolov11.tt.model_preprocessing import ( - create_yolov11_input_tensors, - create_yolov11_model_parameters, -) -from models.experimental.functional_yolov11.tt import ttnn_yolov11 -import torch.nn as nn - -try: - sys.modules["ultralytics"] = yolov11 - sys.modules["ultralytics.nn.tasks"] = yolov11 - sys.modules["ultralytics.nn.modules.conv"] = yolov11 - sys.modules["ultralytics.nn.modules.block"] = yolov11 - sys.modules["ultralytics.nn.modules.head"] = yolov11 - -except KeyError: - print("models.experimental.functional_yolov11.reference.yolov11 not found.") - - -class Ensemble(nn.ModuleList): - def __init__(self): - super(Ensemble, self).__init__() - - def forward(self, x, augment=False): - y = [] - for module in self: - y.append(module(x, augment)[0]) - y = torch.cat(y, 1) - return y, None - - -def attempt_load(weights, map_location=None): - model = Ensemble() - for w in weights if isinstance(weights, list) else [weights]: - w = "models/experimental/functional_yolov11/reference/yolo11n.pt" - ckpt = torch.load(w, map_location=map_location) - model.append(ckpt["ema" if ckpt.get("ema") else "model"].float().eval()) - for m in model.modules(): - if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: - m.inplace = True - elif type(m) is nn.Upsample: - m.recompute_scale_factor = None - - if len(model) == 1: - return model[-1] - else: - for k in ["names", "stride"]: - setattr(model, k, getattr(model[-1], k)) - return model - - -@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) -def test_yolov11(device, use_program_cache, reset_seeds): - torch_input, ttnn_input = create_yolov11_input_tensors(device, input_channels=3, input_height=224, input_width=224) - - torch_model = attempt_load("yolov11n.pt", map_location="cpu") - state_dict = torch_model.state_dict() - torch_model = yolov11.YoloV11() - ds_state_dict = {k: v for k, v in state_dict.items()} - new_state_dict = {} - for (name1, parameter1), (name2, parameter2) in zip(torch_model.state_dict().items(), ds_state_dict.items()): - if isinstance(parameter2, torch.FloatTensor): - new_state_dict[name1] = parameter2 - torch_model.load_state_dict(new_state_dict) - torch_model.eval() - - torch_output = torch_model(torch_input) - parameters = create_yolov11_model_parameters(torch_model, torch_input, device=device) - ttnn_model = ttnn_yolov11.YoloV11(device, parameters) - ttnn_output = ttnn_model(ttnn_input) - ttnn_output = ttnn.to_torch(ttnn_output) - - ttnn_output = ttnn_output.reshape(torch_output.shape) - assert_with_pcc(torch_output, ttnn_output, 0.99999) diff --git a/models/experimental/functional_yolov11/test/test_work_arounds_work.py b/models/experimental/functional_yolov11/test/test_work_arounds_work.py deleted file mode 100644 index 1e8abd03813..00000000000 --- a/models/experimental/functional_yolov11/test/test_work_arounds_work.py +++ /dev/null @@ -1,110 +0,0 @@ -import torch, ttnn, pytest, torch.nn as nn -from tests.ttnn.utils_for_testing import assert_with_pcc -import ttnn - - -@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) -def test_silu_alone(device, use_program_cache, reset_seeds): - torch_input_tensor = torch.randn(1, 32, 56, 56) - act = nn.SiLU(inplace=True) - torch_x = act(torch_input_tensor) - - ttnn_input_tensor = torch.permute(torch_input_tensor, (0, 2, 3, 1)) - ttnn_input_tensor = ttnn_input_tensor.reshape( - 1, - 1, - ttnn_input_tensor.shape[0] * ttnn_input_tensor.shape[1] * ttnn_input_tensor.shape[2], - ttnn_input_tensor.shape[3], - ) - ttnn_x = ttnn.from_torch( - ttnn_input_tensor, - dtype=ttnn.bfloat16, - layout=ttnn.TILE_LAYOUT, - device=device, - memory_config=ttnn.L1_MEMORY_CONFIG, - ) - ttnn_x = ttnn.silu(ttnn_x) - ttnn_x = ttnn.to_torch(ttnn_x).reshape(1, 56, 56, 32).permute(0, 3, 1, 2) - - assert_with_pcc(torch_x, ttnn_x, 0.99999) - - -@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) -def test_slice_alone(device, use_program_cache, reset_seeds): - torch_input_tensor = torch.randn(1, 64, 28, 28) - torch_y1, torch_y2 = torch_input_tensor.chunk(2, 1) - ttnn_input_tensor = torch.permute(torch_input_tensor, (0, 2, 3, 1)) - ttnn_input_tensor = ttnn_input_tensor.reshape( - 1, - 1, - ttnn_input_tensor.shape[0] * ttnn_input_tensor.shape[1] * ttnn_input_tensor.shape[2], - ttnn_input_tensor.shape[3], - ) - ttnn_x = ttnn.from_torch(ttnn_input_tensor, dtype=ttnn.bfloat16, layout=ttnn.TILE_LAYOUT, device=device) - ttnn_x = ttnn.to_layout(ttnn_x, ttnn.ROW_MAJOR_LAYOUT) - ttnn_y1 = ttnn_x[:, :, :, :32] - ttnn_y2 = ttnn_x[:, :, :, 32:64] - ttnn_y1 = ttnn.to_torch(ttnn_y1).reshape(1, 28, 28, 32).permute(0, 3, 1, 2) - - assert_with_pcc(torch_y1, ttnn_y1, 0.99999) - - -@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) -def test_silu_layout_issue(device, use_program_cache, reset_seeds): - torch_input_tensor = torch.randn(1, 64, 28, 28) - act = nn.SiLU(inplace=True) - torch_x = act(torch_input_tensor) - torch_y1, torch_y2 = torch_input_tensor.chunk(2, 1) - - ttnn_input_tensor = torch.permute(torch_input_tensor, (0, 2, 3, 1)) - - ttnn_input_tensor = ttnn_input_tensor.reshape( - 1, - 1, - ttnn_input_tensor.shape[0] * ttnn_input_tensor.shape[1] * ttnn_input_tensor.shape[2], - ttnn_input_tensor.shape[3], - ) - ttnn_x = ttnn.from_torch(ttnn_input_tensor, dtype=ttnn.bfloat16, layout=ttnn.TILE_LAYOUT, device=device) - ttnn_x = ttnn.silu(ttnn_x) - ttnn_x = ttnn.to_layout(ttnn_x, ttnn.ROW_MAJOR_LAYOUT) - ttnn_y1 = ttnn_x[:, :, :, :32] - ttnn_y2 = ttnn_x[:, :, :, 32:64] - # ttnn_x = ttnn.to_layout(ttnn_x, ttnn.ROW_MAJOR_LAYOUT) - # ttnn_x = ttnn.reshape(ttnn_x, (1, 28, 28, 64)) - - # ttnn_y1, ttnn_y2 = ttnn.split(ttnn_x, 2, 3) - ttnn_y1 = ttnn.to_torch(ttnn_y1).reshape(1, 28, 28, 32).permute(0, 3, 1, 2) - - assert_with_pcc(torch_y1, ttnn_y1, 0.99999) - - -@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) -def test_split_dumped(device, use_program_cache, reset_seeds): # 0.005 for fp8 and 099 for fp16 - torch_input_tensor = torch.randn(1, 64, 28, 28) - torch_y1, torch_y2 = torch_input_tensor.chunk(2, 1) - - ttnn_input_tensor = torch.permute(torch_input_tensor, (0, 2, 3, 1)) - ttnn_input_tensor = ttnn_input_tensor.reshape( - 1, - 1, - ttnn_input_tensor.shape[0] * ttnn_input_tensor.shape[1] * ttnn_input_tensor.shape[2], - ttnn_input_tensor.shape[3], - ) - ttnn_x = ttnn.from_torch(ttnn_input_tensor, dtype=ttnn.bfloat8_b, layout=ttnn.TILE_LAYOUT, device=device) - ttnn_x = ttnn.to_layout(ttnn_x, ttnn.ROW_MAJOR_LAYOUT) - ttnn_x = ttnn.reshape(ttnn_x, (1, 28, 28, 64)) - ttnn_y1 = ttnn_x[:, :, :, :32] - ttnn_y2 = ttnn_x[:, :, :, 32:64] - - assert_with_pcc(torch_y1, ttnn.to_torch(ttnn_y1).reshape(1, 28, 28, 32).permute(0, 3, 1, 2), 0.99999) - - -@pytest.mark.parametrize("device_params", [{"l1_small_size": 16384}], indirect=True) -def test_dtype_issue(device): - a = torch.randn((1, 256, 1, 49), dtype=torch.bfloat16) - a_ttnn = ttnn.from_torch(a, layout=ttnn.TILE_LAYOUT, dtype=ttnn.bfloat8_b) - print("bfp8", a_ttnn.dtype) - a_ttnn = ttnn.to_layout(a_ttnn, layout=ttnn.TILE_LAYOUT, dtype=ttnn.bfloat16) - print("bfp16", a_ttnn.dtype) - ttnn_output = ttnn.to_torch(a_ttnn) - assert_with_pcc(a, ttnn_output, 0.99999) diff --git a/models/experimental/functional_yolov11/test/test_yolov11_demo.py b/models/experimental/functional_yolov11/test/test_yolov11_demo.py deleted file mode 100644 index 0ea487f6980..00000000000 --- a/models/experimental/functional_yolov11/test/test_yolov11_demo.py +++ /dev/null @@ -1,87 +0,0 @@ -# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc. - -# SPDX-License-Identifier: Apache-2.0 - -import pytest -import ttnn -import torch -from tests.ttnn.utils_for_testing import assert_with_pcc -import sys - -from models.experimental.functional_yolov11.reference import yolov11 - -from models.experimental.functional_yolov11.tt.model_preprocessing import ( - create_yolov11_input_tensors, - create_yolov11_model_parameters, -) -from models.experimental.functional_yolov11.tt import ttnn_yolov11 -import torch.nn as nn - -try: - sys.modules["ultralytics"] = yolov11 - sys.modules["ultralytics.nn.tasks"] = yolov11 - sys.modules["ultralytics.nn.modules.conv"] = yolov11 - sys.modules["ultralytics.nn.modules.block"] = yolov11 - sys.modules["ultralytics.nn.modules.head"] = yolov11 - -except KeyError: - print("models.experimental.functional_yolov11.reference.yolov11 not found.") - - -class Ensemble(nn.ModuleList): - def __init__(self): - super(Ensemble, self).__init__() - - def forward(self, x, augment=False): - y = [] - for module in self: - y.append(module(x, augment)[0]) - y = torch.cat(y, 1) - return y, None - - -def attempt_load(weights, map_location=None): - model = Ensemble() - for w in weights if isinstance(weights, list) else [weights]: - w = "models/experimental/functional_yolov11/reference/yolo11n.pt" - ckpt = torch.load(w, map_location=map_location) - model.append(ckpt["ema" if ckpt.get("ema") else "model"].float().eval()) - for m in model.modules(): - if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: - m.inplace = True - elif type(m) is nn.Upsample: - m.recompute_scale_factor = None - - if len(model) == 1: - return model[-1] - else: - for k in ["names", "stride"]: - setattr(model, k, getattr(model[-1], k)) - return model - - -@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) -def test_yolov11(device, use_program_cache, reset_seeds): - torch_input, ttnn_input = create_yolov11_input_tensors(device) - - torch_model = attempt_load("yolov11n.pt", map_location="cpu") - state_dict = torch_model.state_dict() - torch_model = yolov11.YoloV11() - ds_state_dict = {k: v for k, v in state_dict.items()} - new_state_dict = {} - for (name1, parameter1), (name2, parameter2) in zip(torch_model.state_dict().items(), ds_state_dict.items()): - if isinstance(parameter2, torch.FloatTensor): - new_state_dict[name1] = parameter2 - torch_model.load_state_dict(new_state_dict) - torch_model.eval() - - torch_output = torch_model(torch_input) - - parameters = create_yolov11_model_parameters(torch_model, torch_input, device=device) - ttnn_model = ttnn_yolov11.YoloV11(device, parameters) - ttnn_output = ttnn_model(ttnn_input) - - ttnn_output = ttnn.to_torch(ttnn_output) - ttnn_output = ttnn_output.permute(0, 3, 1, 2) - ttnn_output = ttnn_output.reshape(torch_output.shape) - assert_with_pcc(torch_output, ttnn_output, 0.99999) diff --git a/models/experimental/functional_yolov11/tests/test_yolov11.py b/models/experimental/functional_yolov11/tests/test_yolov11.py new file mode 100644 index 00000000000..6bd1eeb76dd --- /dev/null +++ b/models/experimental/functional_yolov11/tests/test_yolov11.py @@ -0,0 +1,125 @@ +# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + +import sys +import ttnn +import time +import torch +import pytest +import torch.nn as nn +from loguru import logger +from models.utility_functions import is_wormhole_b0 +from models.perf.perf_utils import prep_perf_report +from models.experimental.functional_yolov11.tt import ttnn_yolov11 +from models.experimental.functional_yolov11.reference import yolov11 +from models.experimental.functional_yolov11.reference.yolov11 import attempt_load +from models.utility_functions import enable_persistent_kernel_cache, disable_persistent_kernel_cache +from models.perf.device_perf_utils import run_device_perf, check_device_perf, prep_device_perf_report +from models.experimental.functional_yolov11.tt.model_preprocessing import ( + create_yolov11_input_tensors, + create_yolov11_model_parameters, +) + +try: + sys.modules["ultralytics"] = yolov11 + sys.modules["ultralytics.nn.tasks"] = yolov11 + sys.modules["ultralytics.nn.modules.conv"] = yolov11 + sys.modules["ultralytics.nn.modules.block"] = yolov11 + sys.modules["ultralytics.nn.modules.head"] = yolov11 + +except KeyError: + print("models.experimental.functional_yolov11.reference.yolov11 not found.") + + +def get_expected_times(name): + base = {"yolov11": (130.70, 0.594)} + return base[name] + + +@pytest.mark.models_performance_bare_metal +@pytest.mark.parametrize("device_params", [{"l1_small_size": 32768}], indirect=True) +@pytest.mark.parametrize("batch_size", [(1)]) +@pytest.mark.parametrize("input_tensor", [torch.rand((1, 3, 640, 640))], ids=["input_tensor"]) +def test_yolov11(device, input_tensor, batch_size): + disable_persistent_kernel_cache() + torch_input, ttnn_input = create_yolov11_input_tensors( + device, + batch=input_tensor.shape[0], + input_channels=input_tensor.shape[1], + input_height=input_tensor.shape[2], + input_width=input_tensor.shape[3], + ) + torch_model = attempt_load("yolo11n.pt", map_location="cpu") + state_dict = torch_model.state_dict() + torch_model = yolov11.YoloV11() + ds_state_dict = {k: v for k, v in state_dict.items()} + new_state_dict = {} + for (name1, parameter1), (name2, parameter2) in zip(torch_model.state_dict().items(), ds_state_dict.items()): + if isinstance(parameter2, torch.FloatTensor): + new_state_dict[name1] = parameter2 + torch_model.load_state_dict(new_state_dict) + torch_model.eval() + parameters = create_yolov11_model_parameters(torch_model, torch_input, device=device) + model = ttnn_yolov11.YoloV11(device, parameters) + durations = [] + + for i in range(2): + start = time.time() + ttnn_model_output = model(ttnn_input) + end = time.time() + durations.append(end - start) + ttnn.deallocate(ttnn_model_output) + enable_persistent_kernel_cache() + + inference_and_compile_time, inference_time, *_ = durations + + expected_compile_time, expected_inference_time = get_expected_times("yolov11") + + prep_perf_report( + model_name="models/experimental/functional_yolov11", + batch_size=batch_size, + inference_and_compile_time=inference_and_compile_time, + inference_time=inference_time, + expected_compile_time=expected_compile_time, + expected_inference_time=expected_inference_time, + comments="", + inference_time_cpu=0.0, + ) + + logger.info(f"Compile time: {inference_and_compile_time - inference_time}") + logger.info(f"Inference time: {inference_time}") + logger.info(f"Samples per second: {1 / inference_time * batch_size}") + + +@pytest.mark.parametrize( + "batch_size, expected_perf", + [ + [1, 81.94], + ], +) +@pytest.mark.models_device_performance_bare_metal +def test_perf_device_bare_metal_yolov11(batch_size, expected_perf): + subdir = "ttnn_yolov11" + num_iterations = 1 + margin = 0.03 + expected_perf = expected_perf if is_wormhole_b0() else 0 + + command = f"pytest models/experimental/functional_yolov11/demo/demo.py::test_demo" + cols = ["DEVICE FW", "DEVICE KERNEL", "DEVICE BRISC KERNEL"] + + inference_time_key = "AVG DEVICE KERNEL SAMPLES/S" + expected_perf_cols = {inference_time_key: expected_perf} + + post_processed_results = run_device_perf(command, subdir, num_iterations, cols, batch_size) + expected_results = check_device_perf(post_processed_results, margin, expected_perf_cols) + + logger.info(f"{expected_results}") + + prep_device_perf_report( + model_name=f"ttnn_functional_yolov11{batch_size}", + batch_size=batch_size, + post_processed_results=post_processed_results, + expected_results=expected_results, + comments="", + ) diff --git a/models/experimental/functional_yolov11/test/test_yolov11_perfomant.py b/models/experimental/functional_yolov11/tests/test_yolov11_perfomant.py similarity index 91% rename from models/experimental/functional_yolov11/test/test_yolov11_perfomant.py rename to models/experimental/functional_yolov11/tests/test_yolov11_perfomant.py index de99d2d28f6..10658d8ee58 100644 --- a/models/experimental/functional_yolov11/test/test_yolov11_perfomant.py +++ b/models/experimental/functional_yolov11/tests/test_yolov11_perfomant.py @@ -3,11 +3,9 @@ # SPDX-License-Identifier: Apache-2.0 import pytest -import ttnn -import torch from tests.ttnn.utils_for_testing import assert_with_pcc from models.utility_functions import run_for_wormhole_b0 -from models.experimental.functional_yolov11.test.yolov11_perfomant import ( +from models.experimental.functional_yolov11.tests.yolov11_perfomant import ( run_yolov11_trace_inference, run_yolov11_trace_2cqs_inference, ) diff --git a/models/experimental/functional_yolov11/test/yolov11_perfomant.py b/models/experimental/functional_yolov11/tests/yolov11_perfomant.py similarity index 96% rename from models/experimental/functional_yolov11/test/yolov11_perfomant.py rename to models/experimental/functional_yolov11/tests/yolov11_perfomant.py index 3ed329902fc..fd119146196 100644 --- a/models/experimental/functional_yolov11/test/yolov11_perfomant.py +++ b/models/experimental/functional_yolov11/tests/yolov11_perfomant.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. +# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc. # SPDX-License-Identifier: Apache-2.0 @@ -6,7 +6,7 @@ import torch import ttnn from models.utility_functions import is_wormhole_b0, profiler -from models.experimental.functional_yolov11.test.yolov11_test_infra import create_test_infra +from models.experimental.functional_yolov11.tests.yolov11_test_infra import create_test_infra try: from tracy import signpost @@ -56,7 +56,6 @@ def run_yolov11_trace_inference( trace_input_addr = ttnn.buffer_address(test_infra.input_tensor) tid = ttnn.begin_trace_capture(device, cq_id=0) test_infra.run() - print("run3") tt_image_res = ttnn.allocate_tensor_on_device(spec, device) ttnn.end_trace_capture(device, tid, cq_id=0) assert trace_input_addr == ttnn.buffer_address(tt_image_res) diff --git a/models/experimental/functional_yolov11/test/yolov11_test_infra.py b/models/experimental/functional_yolov11/tests/yolov11_test_infra.py similarity index 79% rename from models/experimental/functional_yolov11/test/yolov11_test_infra.py rename to models/experimental/functional_yolov11/tests/yolov11_test_infra.py index 71fc4e50019..09edeebc7c9 100644 --- a/models/experimental/functional_yolov11/test/yolov11_test_infra.py +++ b/models/experimental/functional_yolov11/tests/yolov11_test_infra.py @@ -1,17 +1,14 @@ -# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. +# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc. # SPDX-License-Identifier: Apache-2.0 from loguru import logger -import os -import pytest import torch -import torchvision from tests.ttnn.utils_for_testing import assert_with_pcc import ttnn from models.experimental.functional_yolov11.reference import yolov11 -from models.experimental.functional_yolov11.reference.yolov11 import YoloV11 as torch_yolov11 -from models.experimental.functional_yolov11.tt.ttnn_yolov11 import YoloV11 as ttnn_yolov11 +from models.experimental.functional_yolov11.tt import ttnn_yolov11 +from models.experimental.functional_yolov11.reference.yolov11 import attempt_load import sys from models.utility_functions import ( is_wormhole_b0, @@ -35,38 +32,6 @@ print("models.experimental.functional_yolov11.reference.yolov11 not found.") -class Ensemble(nn.ModuleList): - def __init__(self): - super(Ensemble, self).__init__() - - def forward(self, x, augment=False): - y = [] - for module in self: - y.append(module(x, augment)[0]) - y = torch.cat(y, 1) - return y, None - - -def attempt_load(weights, map_location=None): - model = Ensemble() - for w in weights if isinstance(weights, list) else [weights]: - w = "models/experimental/functional_yolov11/reference/yolo11n.pt" - ckpt = torch.load(w, map_location=map_location) - model.append(ckpt["ema" if ckpt.get("ema") else "model"].float().eval()) - for m in model.modules(): - if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: - m.inplace = True - elif type(m) is nn.Upsample: - m.recompute_scale_factor = None - - if len(model) == 1: - return model[-1] - else: - for k in ["names", "stride"]: - setattr(model, k, getattr(model[-1], k)) - return model - - def load_yolov11_model(): torch_model = attempt_load("yolov11n.pt", map_location="cpu") state_dict = torch_model.state_dict() @@ -97,7 +62,7 @@ def __init__( torch_model = load_yolov11_model() parameters = create_yolov11_model_parameters(torch_model, self.torch_input, device=device) self.torch_output = torch_model(self.torch_input) - self.ttnn_yolov11_model = ttnn_yolov11(device, parameters) + self.ttnn_yolov11_model = ttnn_yolov11.YoloV11(device, parameters) def run(self): self.output_tensor = self.ttnn_yolov11_model(self.input_tensor) @@ -150,7 +115,6 @@ def setup_dram_sharded_input(self, device, torch_input_tensor=None, mesh_mapper= def validate(self, output_tensor=None): output_tensor = self.output_tensor if output_tensor is None else output_tensor output_tensor = ttnn.to_torch(self.output_tensor) - # output_tensor = torch.permute(output_tensor, (0, 3, 1, 2)) output_tensor = output_tensor.reshape((self.torch_output).shape) valid_pcc = 0.98 diff --git a/models/experimental/functional_yolov11/tt/model_preprocessing.py b/models/experimental/functional_yolov11/tt/model_preprocessing.py index 6a042b9f12a..5ac679d9900 100644 --- a/models/experimental/functional_yolov11/tt/model_preprocessing.py +++ b/models/experimental/functional_yolov11/tt/model_preprocessing.py @@ -1,5 +1,6 @@ # SPDX-FileCopyrightText: © 2025 Tenstorrent Inc. -# SPDX-License-Identifier: Apache-2.0] + +# SPDX-License-Identifier: Apache-2.0 import torch import ttnn @@ -11,7 +12,6 @@ def create_yolov11_input_tensors(device, batch=1, input_channels=3, input_height=224, input_width=224): - # torch.manual_seed(20) torch_input_tensor = torch.randn(batch, input_channels, input_height, input_width) ttnn_input_tensor = torch.permute(torch_input_tensor, (0, 2, 3, 1)) ttnn_input_tensor = ttnn_input_tensor.reshape( @@ -39,12 +39,8 @@ def make_anchors(device, feats, strides, grid_cell_offset=0.5): b = torch.cat(stride_tensor).transpose(0, 1) return ( - ttnn.from_torch( - a, dtype=ttnn.bfloat16, layout=ttnn.TILE_LAYOUT, device=device, memory_config=ttnn.L1_MEMORY_CONFIG - ), - ttnn.from_torch( - b, dtype=ttnn.bfloat16, layout=ttnn.TILE_LAYOUT, device=device, memory_config=ttnn.L1_MEMORY_CONFIG - ), + ttnn.from_torch(a, dtype=ttnn.bfloat16, layout=ttnn.TILE_LAYOUT, device=device), + ttnn.from_torch(b, dtype=ttnn.bfloat16, layout=ttnn.TILE_LAYOUT, device=device), ) @@ -97,17 +93,6 @@ def __delattr__(self, key): def preprocess(d: dict, weights_path: str, bias_path: str): - """ - Accesses a tensor within a nested dictionary using a path string. - - Args: - d: The dictionary containing the nested structure. - path: A string representing the path to the tensor, - e.g., "conv1.module.conv.weight". - - Returns: - The tensor found at the specified path, or None if not found. - """ tt_bias = None weight_keys = weights_path.split(".") bias_keys = bias_path.split(".") @@ -162,7 +147,7 @@ def create_yolov11_model_parameters(model: YoloV11, input_tensor: torch.Tensor, input_tensor.shape[3] // 8, input_tensor.shape[3] // 16, input_tensor.shape[3] // 32, - ] # Values depends on input resolution. Current: 224x224 + ] strides = [8.0, 16.0, 32.0] anchors, strides = make_anchors(device, feats, strides) # Optimization: Processing make anchors outside model run @@ -187,7 +172,7 @@ def create_yolov11_model_parameters_detect( model=model, run_model=lambda model: model(input_tensor_1, input_tensor_2, input_tensor_3), device=None ) - feats = [28, 14, 7] # Values depends on input resolution. Current: 224x224 + feats = [28, 14, 7] strides = [8.0, 16.0, 32.0] anchors, strides = make_anchors(device, feats, strides) # Optimization: Processing make anchors outside model run diff --git a/models/experimental/functional_yolov11/tt/ttnn_yolov11.py b/models/experimental/functional_yolov11/tt/ttnn_yolov11.py index ceabf111b65..72ad8b6b944 100644 --- a/models/experimental/functional_yolov11/tt/ttnn_yolov11.py +++ b/models/experimental/functional_yolov11/tt/ttnn_yolov11.py @@ -1,12 +1,13 @@ # SPDX-FileCopyrightText: © 2025 Tenstorrent Inc. + # SPDX-License-Identifier: Apache-2.0 + import ttnn import math from tt_lib.utils import ( _nearest_y, ) from tests.ttnn.ttnn_utility_fuction import get_shard_grid_from_num_cores -import torch class Yolov11_Conv2D: @@ -107,56 +108,6 @@ def __call__(self, x): return x -def Yolov11_shard_SiLU(device, x, ncores=64): - input_2d_height = x.shape.with_tile_padding()[2] - input_2d_width = x.shape.with_tile_padding()[3] - - input_2d_height_padded = _nearest_y(input_2d_height, ncores * 32) - - shard_height = math.ceil(input_2d_height_padded / ncores) - shard_grid = get_shard_grid_from_num_cores(ncores, device) - shard_width = input_2d_width - shard_orientation = ttnn.ShardOrientation.ROW_MAJOR - tensor_memory_layout = ttnn.TensorMemoryLayout.HEIGHT_SHARDED - - shard_spec = ttnn.ShardSpec(shard_grid, (shard_height, shard_width), shard_orientation, False) - - in_sharded_mem_config = ttnn.MemoryConfig(tensor_memory_layout, ttnn.BufferType.L1, shard_spec) - - x = ttnn.to_memory_config(x, memory_config=in_sharded_mem_config) - - x = ttnn.silu(x, memory_config=in_sharded_mem_config) - return x - - -def Yolov11_shard_upsample(device, x): - shard_grid = ttnn.CoreRangeSet( - { - ttnn.CoreRange( - ttnn.CoreCoord(0, 0), - ttnn.CoreCoord(7, 5), - ), - ttnn.CoreRange( - ttnn.CoreCoord(0, 6), - ttnn.CoreCoord(0, 6), - ), - } - ) - shard_height = math.ceil(x.shape[0] * x.shape[1] * x.shape[2] / 49) - shard_width = x.shape[-1] - shard_spec = ttnn.ShardSpec(shard_grid, (shard_height, shard_width), ttnn.ShardOrientation.ROW_MAJOR) - in_sharded_mem_config = ttnn.MemoryConfig(ttnn.TensorMemoryLayout.HEIGHT_SHARDED, ttnn.BufferType.L1, shard_spec) - x = ttnn.to_memory_config(x, memory_config=in_sharded_mem_config) - shard_height_out = shard_height * 2 * 2 # scale_factor=2 - shard_spec_out = ttnn.ShardSpec(shard_grid, (shard_height_out, shard_width), ttnn.ShardOrientation.ROW_MAJOR) - out_sharded_mem_config = ttnn.MemoryConfig( - ttnn.TensorMemoryLayout.HEIGHT_SHARDED, ttnn.types.BufferType.L1, shard_spec_out - ) - x = ttnn.upsample(x, scale_factor=2, memory_config=out_sharded_mem_config) - x = ttnn.sharded_to_interleaved(x, memory_config=ttnn.L1_MEMORY_CONFIG) - return x - - def sharded_concat(input_tensors, num_cores=64, dim=3): # expected input tensors to be in fp16, RM, same (h*w) shard_grid = ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}) in_shard_width = input_tensors[0].shape[-1] @@ -283,7 +234,7 @@ def __call__(self, device, x): k1 = self.k1(device, x1) k2 = self.k2(device, k1) - use_shard_concat = False # fps drop due to layout conversion + use_shard_concat = False if use_shard_concat: x2 = ttnn.to_layout(x2, ttnn.ROW_MAJOR_LAYOUT) x2 = ttnn.to_dtype(x2, ttnn.bfloat16) @@ -537,9 +488,6 @@ def __call__(self, device, y1, y2, y3): y2 = ttnn.concat((x2, x5), -1, memory_config=ttnn.L1_MEMORY_CONFIG) y3 = ttnn.concat((x3, x6), -1, memory_config=ttnn.L1_MEMORY_CONFIG) - y1_reshaped = ttnn.reshape(y1, (y1.shape[0], y1.shape[2], y1.shape[-1])) - y2_reshaped = ttnn.reshape(y2, (y2.shape[0], y2.shape[2], y2.shape[-1])) - y3_reshaped = ttnn.reshape(y3, (y3.shape[0], y3.shape[2], y3.shape[-1])) y = ttnn.concat((y1, y2, y3), dim=2, memory_config=ttnn.L1_MEMORY_CONFIG) y = ttnn.squeeze(y, dim=0) ya, yb = y[:, :, :64], y[:, :, 64:144] @@ -561,7 +509,6 @@ def __call__(self, device, y1, y2, y3): c = self.dfl(ya) ttnn.deallocate(ya) c = ttnn.sharded_to_interleaved(c, memory_config=ttnn.L1_MEMORY_CONFIG) - c = ttnn.to_layout(c, layout=ttnn.ROW_MAJOR_LAYOUT) c = ttnn.permute(c, (0, 3, 1, 2)) c = ttnn.reshape(c, (c.shape[0], 1, 4, int(c.shape[3] / 4))) @@ -569,6 +516,8 @@ def __call__(self, device, y1, y2, y3): c1, c2 = c[:, :2, :], c[:, 2:4, :] anchor, strides = self.anchors, self.strides + anchor = ttnn.to_memory_config(anchor, memory_config=ttnn.L1_MEMORY_CONFIG) + strides = ttnn.to_memory_config(strides, memory_config=ttnn.L1_MEMORY_CONFIG) c1 = ttnn.to_layout(c1, layout=ttnn.TILE_LAYOUT) c2 = ttnn.to_layout(c2, layout=ttnn.TILE_LAYOUT) @@ -659,7 +608,6 @@ def __call__(self, x): x = self.c2psa(self.device, x) x10 = x x = ttnn.to_layout(x, layout=ttnn.ROW_MAJOR_LAYOUT) - # x = ttnn.to_dtype(x, ttnn.bfloat16) x = ttnn.reshape(x, (x.shape[0], int(math.sqrt(x.shape[2])), int(math.sqrt(x.shape[2])), x.shape[3])) nhw = x.shape[0] * x.shape[1] * x.shape[2] num_cores = determine_num_cores_for_upsample(nhw, x.shape[2]) @@ -671,17 +619,12 @@ def __call__(self, x): x = ttnn.reshard(x, shardspec) else: x = ttnn.interleaved_to_sharded(x, shardspec) - x = ttnn.upsample(x, scale_factor=2, memory_config=x.memory_config()) # 11 if x.is_sharded(): x = ttnn.sharded_to_interleaved(x, memory_config=ttnn.L1_MEMORY_CONFIG) x = ttnn.reshape(x, (1, 1, x.shape[0] * x.shape[1] * x.shape[2], x.shape[3])) x6 = ttnn.to_layout(x6, layout=ttnn.ROW_MAJOR_LAYOUT) - - # x = sharded_concat([x,x6]) # unequal channels( sharded_concat is not applicable) shard_height = (x[0].shape[2] + 64 - 1) // 64 - print("shard height is ", shard_height) - print("x and x6 sahpes are", x.shape, x6.shape) input_sharded_memory_config_1 = ttnn.create_sharded_memory_config( (shard_height, x.shape[-1]), core_grid=ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}), @@ -694,22 +637,19 @@ def __call__(self, x): strategy=ttnn.ShardStrategy.HEIGHT, use_height_and_width_as_shard_shape=True, ) - # x = ttnn.to_memory_config(x,input_sharded_memory_config_1) - # x6 = ttnn.to_memory_config(x6,input_sharded_memory_config_2) + x = ttnn.to_memory_config(x, input_sharded_memory_config_1) + x6 = ttnn.to_memory_config(x6, input_sharded_memory_config_2) out_sharded_memory_config_ = ttnn.create_sharded_memory_config( (shard_height, x.shape[-1] + x6.shape[-1]), core_grid=ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}), strategy=ttnn.ShardStrategy.HEIGHT, use_height_and_width_as_shard_shape=True, ) - x = ttnn.concat((x, x6), -1, memory_config=ttnn.L1_MEMORY_CONFIG) + x = ttnn.concat((x, x6), -1, memory_config=out_sharded_memory_config_) ttnn.deallocate(x6) - # if x.shape[2]==196: - # x = ttnn.sharded_to_interleaved(x, memory_config=ttnn.L1_MEMORY_CONFIG) - # x = ttnn.to_layout(x, layout=ttnn.TILE_LAYOUT) - print(" after x and x6 concat", x.shape) - # return x + if x.shape[2] == 196: + x = ttnn.sharded_to_interleaved(x, memory_config=ttnn.L1_MEMORY_CONFIG) x = self.c3k2_5(self.device, x) # 13 x13 = x x = ttnn.to_layout(x, layout=ttnn.ROW_MAJOR_LAYOUT) @@ -730,26 +670,16 @@ def __call__(self, x): x = ttnn.reshape(x, (1, 1, x.shape[0] * x.shape[1] * x.shape[2], x.shape[3])) x4 = ttnn.to_layout(x4, layout=ttnn.ROW_MAJOR_LAYOUT) x = sharded_concat([x, x4]) - # x = ttnn.concat((x, x4), -1, memory_config=ttnn.L1_MEMORY_CONFIG) # 15 ttnn.deallocate(x4) - # x = ttnn.to_layout(x, layout=ttnn.TILE_LAYOUT) x = self.c3k2_6(self.device, x) # 16 x16 = x x = self.conv7(self.device, x) # 17 - # x = ttnn.to_layout(x, layout=ttnn.ROW_MAJOR_LAYOUT) - # x = ttnn.to_dtype(x, ttnn.bfloat16) - # x = ttnn.to_layout(x, layout=ttnn.TILE_LAYOUT) - # print("x and x13 shapes are", x.shape, x13.shape, x.dtype, x13.dtype, x.layout, x13.layout) x = ttnn.concat((x, x13), -1, memory_config=ttnn.L1_MEMORY_CONFIG) # 18 ttnn.deallocate(x13) x = self.c3k2_7(self.device, x) # 19 x19 = x - x = self.conv8(self.device, x) # 20 #16 - # x = ttnn.to_layout(x, layout=ttnn.ROW_MAJOR_LAYOUT) - # x = ttnn.to_dtype(x, ttnn.bfloat16) - print("x and x10 shapes are", x.shape, x10.shape, x.dtype, x10.dtype, x.layout, x10.layout) + x = self.conv8(self.device, x) x = ttnn.concat((x, x10), -1, memory_config=ttnn.L1_MEMORY_CONFIG) # 21 - print("output cncat shape is", x.shape) ttnn.deallocate(x10) x = self.c3k2_8(self.device, x) # 22 x22 = x diff --git a/tests/scripts/run_performance.sh b/tests/scripts/run_performance.sh index bd19f0a398f..326312531e2 100755 --- a/tests/scripts/run_performance.sh +++ b/tests/scripts/run_performance.sh @@ -23,6 +23,9 @@ run_perf_models_other() { env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/yolov4/tests/test_perf_yolo.py -m $test_marker env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/wormhole/distilbert/tests/test_perf_distilbert.py -m $test_marker + + #yolov11 + env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/experimental/functional_yolov11/tests/test_yolov11.py -m $test_marker fi env pytest -n auto tests/ttnn/integration_tests/bert/test_performance.py -m $test_marker @@ -103,6 +106,7 @@ run_device_perf_models() { env pytest models/demos/roberta/tests/ -m $test_marker + if [ "$tt_arch" == "grayskull" ]; then #TODO(MO): Until #6560 is fixed, GS device profiler test are grouped with #Model Device perf regression tests to make sure thy run on no-soft-reset BMs @@ -133,6 +137,8 @@ run_device_perf_models() { env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/yolov4/tests/ -m $test_marker env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/wormhole/distilbert/tests -m $test_marker + + env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/experimental/functional_yolov11/tests/ -m $test_marker fi ## Merge all the generated reports diff --git a/tests/scripts/run_python_model_tests.sh b/tests/scripts/run_python_model_tests.sh index 576ef139fc7..0506dfbb2a8 100755 --- a/tests/scripts/run_python_model_tests.sh +++ b/tests/scripts/run_python_model_tests.sh @@ -43,6 +43,9 @@ run_python_model_tests_wormhole_b0() { # Mamba WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -svv models/demos/wormhole/mamba/tests/test_residual_block.py -k "pretrained_weight_false" + # Yolov11 + WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest tests/ttnn/integration_tests/yolov11/test_ttnn_yolo_v11.py + # Llama3.1-8B llama8b=/mnt/MLPerf/tt_dnn-models/llama/Meta-Llama-3.1-8B-Instruct/ # Llama3.2-1B diff --git a/tests/scripts/single_card/run_single_card_demo_tests.sh b/tests/scripts/single_card/run_single_card_demo_tests.sh index e7a8e492122..e85fe1044f5 100755 --- a/tests/scripts/single_card/run_single_card_demo_tests.sh +++ b/tests/scripts/single_card/run_single_card_demo_tests.sh @@ -61,6 +61,9 @@ run_common_func_tests() { #RoBERTa pytest --disable-warnings models/demos/roberta/demo/demo.py --timeout 600; fail+=$? + # Yolov11 + WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest --disable-warnings models/experimental/functional_yolov11/demo/demo.py --timeout 600; fail+=$? + return $fail } diff --git a/tests/ttnn/integration_tests/yolov11/test_ttnn_bottleneck.py b/tests/ttnn/integration_tests/yolov11/test_ttnn_bottleneck.py deleted file mode 100644 index 3aee44b8e84..00000000000 --- a/tests/ttnn/integration_tests/yolov11/test_ttnn_bottleneck.py +++ /dev/null @@ -1,59 +0,0 @@ -# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc. -# SPDX-License-Identifier: Apache-2.0 -import pytest -import ttnn -from tests.ttnn.utils_for_testing import assert_with_pcc -from models.experimental.functional_yolov11.tt.model_preprocessing import ( - create_yolov11_input_tensors, - create_yolov11_model_parameters, -) -from models.experimental.functional_yolov11.reference.yolov11 import Bottleneck as torch_bottleneck -from models.experimental.functional_yolov11.tt.ttnn_yolov11 import Bottleneck as ttnn_bottleneck - - -@pytest.mark.parametrize( - "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape", - [ - ([16, 8], [8, 16], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 16, 56, 56]), # 1 - ([32, 16], [16, 32], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 32, 38, 28]), # 2 - ([32, 32], [32, 32], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 32, 14, 14]), # 3 - ([64, 64], [64, 64], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 64, 7, 7]), - ([64, 32], [32, 64], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 64, 14, 14]), - ([32, 16], [16, 32], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 32, 28, 28]), - ([64, 32], [32, 64], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 64, 14, 14]), - ([64, 64], [64, 64], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 64, 7, 7]), - ], -) -@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) -def test_yolo_v11_bottleneck( - device, - use_program_cache, - reset_seeds, - in_channel, - out_channel, - kernel, - stride, - padding, - dilation, - groups, - fwd_input_shape, -): - torch_module = torch_bottleneck(in_channel, out_channel, kernel, stride, padding, dilation, groups) - torch_module.eval() - torch_input, ttnn_input = create_yolov11_input_tensors( - device, - batch=fwd_input_shape[0], - input_channels=fwd_input_shape[1], - input_height=fwd_input_shape[2], - input_width=fwd_input_shape[3], - ) - ttnn_input = ttnn.to_device(ttnn_input, device=device) - ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG) - torch_output = torch_module(torch_input) - parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device) - ttnn_module = ttnn_bottleneck(device=device, parameter=parameters.conv_args, conv_pt=parameters) - ttnn_output = ttnn_module(x=ttnn_input, device=device) - ttnn_output = ttnn.to_torch(ttnn_output) - ttnn_output = ttnn_output.permute(0, 3, 1, 2) - ttnn_output = ttnn_output.reshape(torch_output.shape) - assert_with_pcc(torch_output, ttnn_output, 0.99999) diff --git a/tests/ttnn/integration_tests/yolov11/test_ttnn_c2psa.py b/tests/ttnn/integration_tests/yolov11/test_ttnn_c2psa.py deleted file mode 100644 index 25b6bbe79f5..00000000000 --- a/tests/ttnn/integration_tests/yolov11/test_ttnn_c2psa.py +++ /dev/null @@ -1,62 +0,0 @@ -# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc. -# SPDX-License-Identifier: Apache-2.0 -import pytest -import ttnn -import torch -from tests.ttnn.utils_for_testing import assert_with_pcc -from models.experimental.functional_yolov11.tt.model_preprocessing import ( - create_yolov11_input_tensors, - create_yolov11_model_parameters, -) -from models.experimental.functional_yolov11.reference.yolov11 import C2PSA as torch_c2psa_block -from models.experimental.functional_yolov11.tt.ttnn_yolov11 import C2PSA as ttnn_c2psa_block - - -@pytest.mark.parametrize( - "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape", - [ - ( - [256, 256, 128, 128, 128, 128, 256], - [256, 256, 256, 128, 128, 256, 128], - [1, 1, 1, 1, 3, 1, 1], - [1, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 0, 1, 0, 0], - [1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 128, 1, 1], - [1, 256, 7, 7], - ), - ], -) -@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) -def test_yolo_v11_c2psa_block( - device, - use_program_cache, - reset_seeds, - in_channel, - out_channel, - kernel, - stride, - padding, - dilation, - groups, - fwd_input_shape, -): - torch_module = torch_c2psa_block(in_channel, out_channel, kernel, stride, padding, dilation, groups) - torch_module.eval() - torch_input, ttnn_input = create_yolov11_input_tensors( - device, - batch=fwd_input_shape[0], - input_channels=fwd_input_shape[1], - input_height=fwd_input_shape[2], - input_width=fwd_input_shape[3], - ) - ttnn_input = ttnn.to_device(ttnn_input, device=device) - ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG) - torch_output = torch_module(torch_input) - parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device) - ttnn_module = ttnn_c2psa_block(device=device, parameter=parameters.conv_args, conv_pt=parameters) - ttnn_output = ttnn_module(x=ttnn_input, device=device) - ttnn_output = ttnn.to_torch(ttnn_output) - ttnn_output = ttnn_output.permute(0, 3, 1, 2) - ttnn_output = ttnn_output.reshape(torch_output.shape) - assert_with_pcc(torch_output, ttnn_output, 0.99999) diff --git a/tests/ttnn/integration_tests/yolov11/test_ttnn_c3k.py b/tests/ttnn/integration_tests/yolov11/test_ttnn_c3k.py deleted file mode 100644 index 7db507db01f..00000000000 --- a/tests/ttnn/integration_tests/yolov11/test_ttnn_c3k.py +++ /dev/null @@ -1,73 +0,0 @@ -# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc. -# SPDX-License-Identifier: Apache-2.0 - -import pytest -import ttnn -import torch -from tests.ttnn.utils_for_testing import assert_with_pcc -from models.experimental.functional_yolov11.tt.model_preprocessing import ( - create_yolov11_input_tensors, - create_yolov11_model_parameters, -) -from models.experimental.functional_yolov11.reference.yolov11 import C3k as torch_c3k -from models.experimental.functional_yolov11.tt.ttnn_yolov11 import C3K as ttnn_c3k - - -@pytest.mark.parametrize( - "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape", - [ - ( - [64, 64, 64, 32, 32, 32, 32], - [32, 32, 64, 32, 32, 32, 32], - [1, 1, 1, 3, 3, 3, 3], - [1, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1], - [1, 64, 14, 14], - ), - # ( - # [128, 128, 128, 64, 64, 64, 64], - # [64, 64, 128, 64, 64, 64, 64], - # [1, 1, 1, 3, 3, 3, 3], - # [1, 1, 1, 1, 1, 1, 1], - # [0, 0, 0, 1, 1, 1, 1], - # [1, 1, 1, 1, 1, 1, 1], - # [1, 1, 1, 1, 1, 1, 1], - # [1, 128, 7, 7], - # ), - ], -) -@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) -def test_yolo_v11_c3k( - device, - use_program_cache, - reset_seeds, - in_channel, - out_channel, - kernel, - stride, - padding, - dilation, - groups, - fwd_input_shape, -): - torch_module = torch_c3k(in_channel, out_channel, kernel, stride, padding, dilation, groups) - torch_module.eval() - torch_input, ttnn_input = create_yolov11_input_tensors( - device, - batch=fwd_input_shape[0], - input_channels=fwd_input_shape[1], - input_height=fwd_input_shape[2], - input_width=fwd_input_shape[3], - ) - ttnn_input = ttnn.to_device(ttnn_input, device=device) - ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG) - torch_output = torch_module(torch_input) - parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device) - ttnn_module = ttnn_c3k(device=device, parameter=parameters.conv_args, conv_pt=parameters) - ttnn_output = ttnn_module(x=ttnn_input, device=device) # ttnn.Shape([1, 1, 224, 64]) - ttnn_output = ttnn.to_torch(ttnn_output) - ttnn_output = ttnn_output.permute(0, 3, 1, 2) - ttnn_output = ttnn_output.reshape(torch_output.shape) - assert_with_pcc(torch_output, ttnn_output, 0.99999) diff --git a/tests/ttnn/integration_tests/yolov11/test_ttnn_c3k2.py b/tests/ttnn/integration_tests/yolov11/test_ttnn_c3k2.py deleted file mode 100644 index 46523ba7217..00000000000 --- a/tests/ttnn/integration_tests/yolov11/test_ttnn_c3k2.py +++ /dev/null @@ -1,144 +0,0 @@ -# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc. -# SPDX-License-Identifier: Apache-2.0 - -import pytest -import ttnn -import torch -from tests.ttnn.utils_for_testing import assert_with_pcc -from models.experimental.functional_yolov11.tt.model_preprocessing import ( - create_yolov11_input_tensors, - create_yolov11_model_parameters, -) -from models.experimental.functional_yolov11.reference.yolov11 import C3k2 as torch_c3k2 -from models.experimental.functional_yolov11.tt.ttnn_yolov11 import C3k2 as ttnn_c3k2 - - -@pytest.mark.parametrize( - "in_channel, out_channel, kernel, stride, padding, dilation, groups,is_bk_enabled,fwd_input_shape", - [ - # ( - # [32, 48, 16, 8], - # [32, 64, 8, 16], - # [1, 1, 3, 3], - # [1, 1, 1, 1], - # [0, 0, 1, 1], - # [1, 1, 1, 1], - # [1, 1, 1, 1], - # True, - # [1, 32, 56, 56], - # ), - # ( - # [64, 96, 32, 16], - # [64, 128, 16, 32], - # [1, 1, 3, 3], - # [1, 1, 1, 1], - # [0, 0, 1, 1], - # [1, 1, 1, 1], - # [1, 1, 1, 1], - # True, - # [1, 64, 28, 28], - # ), - ( - [128, 192, 64, 64, 64, 32, 32, 32, 32], - [128, 128, 32, 32, 64, 32, 32, 32, 32], - [1, 1, 1, 1, 1, 3, 3, 3, 3], - [1, 1, 1, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 0, 0, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1], - False, - [1, 128, 14, 14], - ), - # ( - # [256, 384, 128, 128, 128, 64, 64, 64, 64], - # [256, 256, 64, 64, 128, 64, 64, 64, 64], - # [1, 1, 1, 1, 1, 3, 3, 3, 3], - # [1, 1, 1, 1, 1, 1, 1, 1, 1], - # [0, 0, 0, 0, 0, 1, 1, 1, 1], - # [1, 1, 1, 1, 1, 1, 1, 1, 1], - # [1, 1, 1, 1, 1, 1, 1, 1, 1], - # False, - # [1, 256, 7, 7], - # ), - # ( - # [384, 192, 64, 32], - # [128, 128, 32, 64], - # [1, 1, 3, 3], - # [1, 1, 1, 1], - # [0, 0, 1, 1], - # [1, 1, 1, 1], - # [1, 1, 1, 1], - # True, - # [1, 384, 14, 14], - # ), - # ( - # [256, 96, 32, 16], - # [64, 64, 16, 32], - # [1, 1, 3, 3], - # [1, 1, 1, 1], - # [0, 0, 1, 1], - # [1, 1, 1, 1], - # [1, 1, 1, 1], - # True, - # [1, 256, 28, 28], - # ), - # ( - # [192, 192, 64, 32], - # [128, 128, 32, 64], - # [1, 1, 3, 3], - # [1, 1, 1, 1], - # [0, 0, 1, 1], - # [1, 1, 1, 1], - # [1, 1, 1, 1], - # True, - # [1, 192, 14, 14], - # ), - # ( - # [384, 384, 128, 128, 128, 64, 64, 64, 64], - # [256, 256, 64, 64, 128, 64, 64, 64, 64], - # [1, 1, 1, 1, 1, 3, 3, 3, 3], - # [1, 1, 1, 1, 1, 1, 1, 1, 1], - # [0, 0, 0, 0, 0, 1, 1, 1, 1], - # [1, 1, 1, 1, 1, 1, 1, 1, 1], - # [1, 1, 1, 1, 1, 1, 1, 1, 1], - # False, - # [1, 384, 7, 7], - # ), - ], -) -@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) -def test_yolo_v11_c3k2( - device, - use_program_cache, - reset_seeds, - in_channel, - out_channel, - kernel, - stride, - padding, - dilation, - groups, - is_bk_enabled, - fwd_input_shape, -): - torch_module = torch_c3k2(in_channel, out_channel, kernel, stride, padding, dilation, groups, is_bk_enabled) - torch_module.eval() - torch_input, ttnn_input = create_yolov11_input_tensors( - device, - batch=fwd_input_shape[0], - input_channels=fwd_input_shape[1], - input_height=fwd_input_shape[2], - input_width=fwd_input_shape[3], - ) - ttnn_input = ttnn.to_device(ttnn_input, device=device, memory_config=ttnn.L1_MEMORY_CONFIG) - # ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, ) - torch_output = torch_module(torch_input) - parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device) - ttnn_module = ttnn_c3k2( - device=device, parameter=parameters.conv_args, conv_pt=parameters, is_bk_enabled=is_bk_enabled - ) - ttnn_output = ttnn_module(x=ttnn_input, device=device) - ttnn_output = ttnn.to_torch(ttnn_output) - ttnn_output = ttnn_output.permute(0, 3, 1, 2) - ttnn_output = ttnn_output.reshape(torch_output.shape) - assert_with_pcc(torch_output, ttnn_output, 0.99999) diff --git a/tests/ttnn/integration_tests/yolov11/test_ttnn_detect.py b/tests/ttnn/integration_tests/yolov11/test_ttnn_detect.py deleted file mode 100644 index 2108bf23eb2..00000000000 --- a/tests/ttnn/integration_tests/yolov11/test_ttnn_detect.py +++ /dev/null @@ -1,86 +0,0 @@ -# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. -# SPDX-License-Identifier: Apache-2.0 -import pytest -import ttnn -import torch -from tests.ttnn.utils_for_testing import assert_with_pcc -from models.experimental.functional_yolov11.tt.model_preprocessing import ( - create_yolov11_input_tensors, - create_yolov11_model_parameters_detect, - create_yolov11_model_parameters, -) -from models.experimental.functional_yolov11.reference.yolov11 import Detect as torch_detect -from models.experimental.functional_yolov11.tt.ttnn_yolov11 import Detect as ttnn_detect -from ttnn.model_preprocessing import preprocess_model_parameters -import math - - -@pytest.mark.parametrize( - "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape", - [ - ( - [64, 64, 64, 128, 64, 64, 256, 64, 64, 64, 64, 80, 80, 80, 128, 128, 80, 80, 80, 256, 256, 80, 80, 80, 16], - [64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 80, 80, 80, 80, 128, 80, 80, 80, 80, 256, 80, 80, 80, 80, 1], - [3, 3, 1, 3, 3, 1, 3, 3, 1, 3, 1, 3, 1, 1, 3, 1, 3, 1, 1, 3, 1, 3, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 64, 1, 80, 1, 1, 128, 1, 80, 1, 1, 256, 1, 80, 1, 1, 1], - [[1, 64, 28, 28], [1, 128, 14, 14], [1, 256, 7, 7]], - ), - ], -) -@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) -def test_yolo_v11_detect( - device, - use_program_cache, - reset_seeds, - in_channel, - out_channel, - kernel, - stride, - padding, - dilation, - groups, - fwd_input_shape, -): - torch_module = torch_detect(in_channel, out_channel, kernel, stride, padding, dilation, groups) - torch_module.eval() - torch_input_1, ttnn_input_1 = create_yolov11_input_tensors( - device, - batch=fwd_input_shape[0][0], - input_channels=fwd_input_shape[0][1], - input_height=fwd_input_shape[0][2], - input_width=fwd_input_shape[0][3], - ) - torch_input_2, ttnn_input_2 = create_yolov11_input_tensors( - device, - batch=fwd_input_shape[1][0], - input_channels=fwd_input_shape[1][1], - input_height=fwd_input_shape[1][2], - input_width=fwd_input_shape[1][3], - ) - torch_input_3, ttnn_input_3 = create_yolov11_input_tensors( - device, - batch=fwd_input_shape[2][0], - input_channels=fwd_input_shape[2][1], - input_height=fwd_input_shape[2][2], - input_width=fwd_input_shape[2][3], - ) - ttnn_input_1 = ttnn.to_device(ttnn_input_1, device=device) - ttnn_input_1 = ttnn.to_layout(ttnn_input_1, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG) - ttnn_input_2 = ttnn.to_device(ttnn_input_2, device=device) - ttnn_input_2 = ttnn.to_layout(ttnn_input_2, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG) - ttnn_input_3 = ttnn.to_device(ttnn_input_3, device=device) - ttnn_input_3 = ttnn.to_layout(ttnn_input_3, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG) - torch_output = torch_module(torch_input_1, torch_input_2, torch_input_3) - parameters = create_yolov11_model_parameters_detect( - torch_module, torch_input_1, torch_input_2, torch_input_3, device=device - ) - ttnn_module = ttnn_detect(device=device, parameter=parameters.model, conv_pt=parameters) - - ttnn_output = ttnn_module(y1=ttnn_input_1, y2=ttnn_input_2, y3=ttnn_input_3, device=device) - ttnn_output = ttnn.to_torch(ttnn_output) - # ttnn_output = ttnn_output.permute(0, 2, 1) - ttnn_output = ttnn_output.reshape(torch_output.shape) - assert_with_pcc(torch_output, ttnn_output, 0.99999) diff --git a/tests/ttnn/integration_tests/yolov11/test_ttnn_psa_block.py b/tests/ttnn/integration_tests/yolov11/test_ttnn_psa_block.py deleted file mode 100644 index c2643318abf..00000000000 --- a/tests/ttnn/integration_tests/yolov11/test_ttnn_psa_block.py +++ /dev/null @@ -1,62 +0,0 @@ -# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc. -# SPDX-License-Identifier: Apache-2.0 -import pytest -import ttnn -import torch -from tests.ttnn.utils_for_testing import assert_with_pcc -from models.experimental.functional_yolov11.tt.model_preprocessing import ( - create_yolov11_input_tensors, - create_yolov11_model_parameters, -) -from models.experimental.functional_yolov11.reference.yolov11 import PSABlock as torch_psa_block -from models.experimental.functional_yolov11.tt.ttnn_yolov11 import PSABlock as ttnn_psa_block - - -@pytest.mark.parametrize( - "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape", - [ - ( - [128, 128, 128, 128, 256], - [256, 128, 128, 256, 128], - [1, 1, 3, 1, 1], - [1, 1, 1, 1, 1], - [0, 0, 1, 0, 0], - [1, 1, 1, 1, 1], - [1, 1, 128, 1, 1], - [1, 128, 7, 7], - ), - ], -) -@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) -def test_yolo_v11_psa_block( - device, - use_program_cache, - reset_seeds, - in_channel, - out_channel, - kernel, - stride, - padding, - dilation, - groups, - fwd_input_shape, -): - torch_module = torch_psa_block(in_channel, out_channel, kernel, stride, padding, dilation, groups) - torch_module.eval() - torch_input, ttnn_input = create_yolov11_input_tensors( - device, - batch=fwd_input_shape[0], - input_channels=fwd_input_shape[1], - input_height=fwd_input_shape[2], - input_width=fwd_input_shape[3], - ) - ttnn_input = ttnn.to_device(ttnn_input, device=device) - ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG) - torch_output = torch_module(torch_input) - parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device) - ttnn_module = ttnn_psa_block(device=device, parameter=parameters.conv_args, conv_pt=parameters) - ttnn_output = ttnn_module(x=ttnn_input, device=device) - ttnn_output = ttnn.to_torch(ttnn_output) - ttnn_output = ttnn_output.permute(0, 3, 1, 2) - ttnn_output = ttnn_output.reshape(torch_output.shape) - assert_with_pcc(torch_output, ttnn_output, 0.99999) diff --git a/tests/ttnn/integration_tests/yolov11/test_ttnn_sppf.py b/tests/ttnn/integration_tests/yolov11/test_ttnn_sppf.py deleted file mode 100644 index d215363fcc5..00000000000 --- a/tests/ttnn/integration_tests/yolov11/test_ttnn_sppf.py +++ /dev/null @@ -1,54 +0,0 @@ -# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc. -# SPDX-License-Identifier: Apache-2.0 - -import pytest -import ttnn -import torch -from tests.ttnn.utils_for_testing import assert_with_pcc -from models.experimental.functional_yolov11.tt.model_preprocessing import ( - create_yolov11_input_tensors, - create_yolov11_model_parameters, -) -from models.experimental.functional_yolov11.reference.yolov11 import SPPF as torch_sppf -from models.experimental.functional_yolov11.tt.ttnn_yolov11 import SPPF as ttnn_sppf - - -@pytest.mark.parametrize( - "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape", - [ - ([256, 512], [128, 256], [1, 1], [1, 1], [0, 0], [1, 1], [1, 1], [1, 256, 7, 7]), - ], -) -@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) -def test_yolo_v11_sppf( - device, - use_program_cache, - reset_seeds, - in_channel, - out_channel, - kernel, - stride, - padding, - dilation, - groups, - fwd_input_shape, -): - torch_module = torch_sppf(in_channel, out_channel, kernel, stride, padding, dilation, groups) - torch_module.eval() - torch_input, ttnn_input = create_yolov11_input_tensors( - device, - batch=fwd_input_shape[0], - input_channels=fwd_input_shape[1], - input_height=fwd_input_shape[2], - input_width=fwd_input_shape[3], - ) - ttnn_input = ttnn.to_device(ttnn_input, device=device) - ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG) - torch_output = torch_module(torch_input) - parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device) - ttnn_module = ttnn_sppf(device=device, parameter=parameters.conv_args, conv_pt=parameters) - ttnn_output = ttnn_module(x=ttnn_input, device=device) - ttnn_output = ttnn.to_torch(ttnn_output) - ttnn_output = ttnn_output.permute(0, 3, 1, 2) - ttnn_output = ttnn_output.reshape(torch_output.shape) - assert_with_pcc(torch_output, ttnn_output, 0.99999) diff --git a/tests/ttnn/integration_tests/yolov11/test_ttnn_yolo_v11.py b/tests/ttnn/integration_tests/yolov11/test_ttnn_yolo_v11.py new file mode 100644 index 00000000000..b3ee0120290 --- /dev/null +++ b/tests/ttnn/integration_tests/yolov11/test_ttnn_yolo_v11.py @@ -0,0 +1,776 @@ +# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + +from pathlib import Path +import os +import pytest +import sys +import ttnn +import torch.nn as nn +import torch +from tests.ttnn.utils_for_testing import assert_with_pcc +from models.experimental.functional_yolov11.tt.model_preprocessing import ( + create_yolov11_input_tensors, + create_yolov11_model_parameters, + create_yolov11_model_parameters_detect, +) +from models.experimental.functional_yolov11.reference.yolov11 import ( + Attention as torch_attention, + Bottleneck as torch_bottleneck, + C2PSA as torch_c2psa_block, + C3k as torch_c3k, + C3k2 as torch_c3k2, + PSABlock as torch_psa_block, + SPPF as torch_sppf, + Detect as torch_detect, +) +from models.experimental.functional_yolov11.tt.ttnn_yolov11 import ( + Attention as ttnn_attention, + Bottleneck as ttnn_bottleneck, + C2PSA as ttnn_c2psa_block, + C3K as ttnn_c3k, + C3k2 as ttnn_c3k2, + PSABlock as ttnn_psa_block, + SPPF as ttnn_sppf, + Detect as ttnn_detect, +) +from models.utility_functions import skip_for_grayskull + +from models.experimental.functional_yolov11.reference import yolov11 +from models.experimental.functional_yolov11.tt import ttnn_yolov11 + + +@pytest.mark.parametrize( + "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape", + [ + ([128, 128, 128], [256, 128, 128], [1, 1, 3], [1, 1, 1], [0, 0, 1], [1, 1, 1], [1, 1, 128], [1, 128, 7, 7]), + ], +) +@skip_for_grayskull() +@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) +def test_yolo_v11_attention( + device, + use_program_cache, + reset_seeds, + in_channel, + out_channel, + kernel, + stride, + padding, + dilation, + groups, + fwd_input_shape, +): + torch_module = torch_attention(in_channel, out_channel, kernel, stride, padding, dilation, groups) + torch_module.eval() + torch_input, ttnn_input = create_yolov11_input_tensors( + device, + batch=fwd_input_shape[0], + input_channels=fwd_input_shape[1], + input_height=fwd_input_shape[2], + input_width=fwd_input_shape[3], + ) + ttnn_input = ttnn.to_device(ttnn_input, device=device) + ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG) + torch_output = torch_module(torch_input) + parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device) + ttnn_module = ttnn_attention(device=device, parameter=parameters.conv_args, conv_pt=parameters) + ttnn_output = ttnn_module(x=ttnn_input, device=device) + ttnn_output = ttnn.to_torch(ttnn_output) + ttnn_output = ttnn_output.permute(0, 3, 1, 2) + ttnn_output = ttnn_output.reshape(torch_output.shape) + assert_with_pcc(torch_output, ttnn_output, 0.99) + + +@skip_for_grayskull() +@pytest.mark.parametrize( + "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape", + [ + ([16, 8], [8, 16], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 16, 56, 56]), # 1 + ([32, 16], [16, 32], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 32, 38, 28]), # 2 + ([32, 32], [32, 32], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 32, 14, 14]), # 3 + ([64, 64], [64, 64], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 64, 7, 7]), + ([64, 32], [32, 64], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 64, 14, 14]), + ([32, 16], [16, 32], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 32, 28, 28]), + ([64, 32], [32, 64], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 64, 14, 14]), + ([64, 64], [64, 64], [3, 3], [1, 1], [1, 1], [1, 1], [1, 1], [1, 64, 7, 7]), + ], +) +@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) +def test_yolo_v11_bottleneck( + device, + use_program_cache, + reset_seeds, + in_channel, + out_channel, + kernel, + stride, + padding, + dilation, + groups, + fwd_input_shape, +): + torch_module = torch_bottleneck(in_channel, out_channel, kernel, stride, padding, dilation, groups) + torch_module.eval() + torch_input, ttnn_input = create_yolov11_input_tensors( + device, + batch=fwd_input_shape[0], + input_channels=fwd_input_shape[1], + input_height=fwd_input_shape[2], + input_width=fwd_input_shape[3], + ) + ttnn_input = ttnn.to_device(ttnn_input, device=device) + ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG) + torch_output = torch_module(torch_input) + parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device) + ttnn_module = ttnn_bottleneck(device=device, parameter=parameters.conv_args, conv_pt=parameters) + ttnn_output = ttnn_module(x=ttnn_input, device=device) + ttnn_output = ttnn.to_torch(ttnn_output) + ttnn_output = ttnn_output.permute(0, 3, 1, 2) + ttnn_output = ttnn_output.reshape(torch_output.shape) + assert_with_pcc(torch_output, ttnn_output, 0.99) + + +@skip_for_grayskull() +@pytest.mark.parametrize( + "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape", + [ + ( + [256, 256, 128, 128, 128, 128, 256], + [256, 256, 256, 128, 128, 256, 128], + [1, 1, 1, 1, 3, 1, 1], + [1, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 1, 0, 0], + [1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 128, 1, 1], + [1, 256, 7, 7], + ), + ], +) +@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) +def test_yolo_v11_c2psa_block( + device, + use_program_cache, + reset_seeds, + in_channel, + out_channel, + kernel, + stride, + padding, + dilation, + groups, + fwd_input_shape, +): + torch_module = torch_c2psa_block(in_channel, out_channel, kernel, stride, padding, dilation, groups) + torch_module.eval() + torch_input, ttnn_input = create_yolov11_input_tensors( + device, + batch=fwd_input_shape[0], + input_channels=fwd_input_shape[1], + input_height=fwd_input_shape[2], + input_width=fwd_input_shape[3], + ) + ttnn_input = ttnn.to_device(ttnn_input, device=device) + ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG) + torch_output = torch_module(torch_input) + parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device) + ttnn_module = ttnn_c2psa_block(device=device, parameter=parameters.conv_args, conv_pt=parameters) + ttnn_output = ttnn_module(x=ttnn_input, device=device) + ttnn_output = ttnn.to_torch(ttnn_output) + ttnn_output = ttnn_output.permute(0, 3, 1, 2) + ttnn_output = ttnn_output.reshape(torch_output.shape) + assert_with_pcc(torch_output, ttnn_output, 0.99) + + +@skip_for_grayskull() +@pytest.mark.parametrize( + "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape", + [ + ( + [64, 64, 64, 32, 32, 32, 32], + [32, 32, 64, 32, 32, 32, 32], + [1, 1, 1, 3, 3, 3, 3], + [1, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1], + [1, 64, 14, 14], + ), + ( + [128, 128, 128, 64, 64, 64, 64], + [64, 64, 128, 64, 64, 64, 64], + [1, 1, 1, 3, 3, 3, 3], + [1, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1], + [1, 128, 7, 7], + ), + ( + [64, 64, 64, 32, 32, 32, 32], + [32, 32, 64, 32, 32, 32, 32], + [1, 1, 1, 3, 3, 3, 3], + [1, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1], + [1, 64, 40, 40], + ), + ( + [128, 128, 128, 64, 64, 64, 64], + [64, 64, 128, 64, 64, 64, 64], + [1, 1, 1, 3, 3, 3, 3], + [1, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1], + [1, 128, 20, 20], + ), + ], +) +@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) +def test_yolo_v11_c3k( + device, + use_program_cache, + reset_seeds, + in_channel, + out_channel, + kernel, + stride, + padding, + dilation, + groups, + fwd_input_shape, +): + torch_module = torch_c3k(in_channel, out_channel, kernel, stride, padding, dilation, groups) + torch_module.eval() + torch_input, ttnn_input = create_yolov11_input_tensors( + device, + batch=fwd_input_shape[0], + input_channels=fwd_input_shape[1], + input_height=fwd_input_shape[2], + input_width=fwd_input_shape[3], + ) + ttnn_input = ttnn.to_device(ttnn_input, device=device) + ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG) + torch_output = torch_module(torch_input) + parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device) + ttnn_module = ttnn_c3k(device=device, parameter=parameters.conv_args, conv_pt=parameters) + ttnn_output = ttnn_module(x=ttnn_input, device=device) + ttnn_output = ttnn.to_torch(ttnn_output) + ttnn_output = ttnn_output.permute(0, 3, 1, 2) + ttnn_output = ttnn_output.reshape(torch_output.shape) + assert_with_pcc(torch_output, ttnn_output, 0.99) + + +@skip_for_grayskull() +@pytest.mark.parametrize( + "in_channel, out_channel, kernel, stride, padding, dilation, groups,is_bk_enabled,fwd_input_shape", + [ + # 224 + ( + [32, 48, 16, 8], + [32, 64, 8, 16], + [1, 1, 3, 3], + [1, 1, 1, 1], + [0, 0, 1, 1], + [1, 1, 1, 1], + [1, 1, 1, 1], + True, + [1, 32, 56, 56], + ), + ( + [64, 96, 32, 16], + [64, 128, 16, 32], + [1, 1, 3, 3], + [1, 1, 1, 1], + [0, 0, 1, 1], + [1, 1, 1, 1], + [1, 1, 1, 1], + True, + [1, 64, 28, 28], + ), + ( + [128, 192, 64, 64, 64, 32, 32, 32, 32], + [128, 128, 32, 32, 64, 32, 32, 32, 32], + [1, 1, 1, 1, 1, 3, 3, 3, 3], + [1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1], + False, + [1, 128, 14, 14], + ), + ( + [256, 384, 128, 128, 128, 64, 64, 64, 64], + [256, 256, 64, 64, 128, 64, 64, 64, 64], + [1, 1, 1, 1, 1, 3, 3, 3, 3], + [1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1], + False, + [1, 256, 7, 7], + ), + ( + [384, 192, 64, 32], + [128, 128, 32, 64], + [1, 1, 3, 3], + [1, 1, 1, 1], + [0, 0, 1, 1], + [1, 1, 1, 1], + [1, 1, 1, 1], + True, + [1, 384, 14, 14], + ), + ( + [256, 96, 32, 16], + [64, 64, 16, 32], + [1, 1, 3, 3], + [1, 1, 1, 1], + [0, 0, 1, 1], + [1, 1, 1, 1], + [1, 1, 1, 1], + True, + [1, 256, 28, 28], + ), + ( + [192, 192, 64, 32], + [128, 128, 32, 64], + [1, 1, 3, 3], + [1, 1, 1, 1], + [0, 0, 1, 1], + [1, 1, 1, 1], + [1, 1, 1, 1], + True, + [1, 192, 14, 14], + ), + ( + [384, 384, 128, 128, 128, 64, 64, 64, 64], + [256, 256, 64, 64, 128, 64, 64, 64, 64], + [1, 1, 1, 1, 1, 3, 3, 3, 3], + [1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1], + False, + [1, 384, 7, 7], + ), + # 640 + ( + [32, 48, 16, 8], + [32, 64, 8, 16], + [1, 1, 3, 3], + [1, 1, 1, 1], + [0, 0, 1, 1], + [1, 1, 1, 1], + [1, 1, 1, 1], + True, + [1, 32, 160, 160], + ), + ( + [64, 96, 32, 16], + [64, 128, 16, 32], + [1, 1, 3, 3], + [1, 1, 1, 1], + [0, 0, 1, 1], + [1, 1, 1, 1], + [1, 1, 1, 1], + True, + [1, 64, 80, 80], + ), + ( + [128, 192, 64, 64, 64, 32, 32, 32, 32], + [128, 128, 32, 32, 64, 32, 32, 32, 32], + [1, 1, 1, 1, 1, 3, 3, 3, 3], + [1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1], + False, + [1, 128, 40, 40], + ), + ( + [256, 384, 128, 128, 128, 64, 64, 64, 64], + [256, 256, 64, 64, 128, 64, 64, 64, 64], + [1, 1, 1, 1, 1, 3, 3, 3, 3], + [1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1], + False, + [1, 256, 20, 20], + ), + ( + [384, 192, 64, 32], + [128, 128, 32, 64], + [1, 1, 3, 3], + [1, 1, 1, 1], + [0, 0, 1, 1], + [1, 1, 1, 1], + [1, 1, 1, 1], + True, + [1, 384, 40, 40], + ), + ( + [256, 96, 32, 16], + [64, 64, 16, 32], + [1, 1, 3, 3], + [1, 1, 1, 1], + [0, 0, 1, 1], + [1, 1, 1, 1], + [1, 1, 1, 1], + True, + [1, 256, 80, 80], + ), + ( + [192, 192, 64, 32], + [128, 128, 32, 64], + [1, 1, 3, 3], + [1, 1, 1, 1], + [0, 0, 1, 1], + [1, 1, 1, 1], + [1, 1, 1, 1], + True, + [1, 192, 40, 40], + ), + ( + [384, 384, 128, 128, 128, 64, 64, 64, 64], + [256, 256, 64, 64, 128, 64, 64, 64, 64], + [1, 1, 1, 1, 1, 3, 3, 3, 3], + [1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1], + False, + [1, 384, 20, 20], + ), + ], +) +@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) +def test_yolo_v11_c3k2( + device, + use_program_cache, + reset_seeds, + in_channel, + out_channel, + kernel, + stride, + padding, + dilation, + groups, + is_bk_enabled, + fwd_input_shape, +): + torch_module = torch_c3k2(in_channel, out_channel, kernel, stride, padding, dilation, groups, is_bk_enabled) + torch_module.eval() + torch_input, ttnn_input = create_yolov11_input_tensors( + device, + batch=fwd_input_shape[0], + input_channels=fwd_input_shape[1], + input_height=fwd_input_shape[2], + input_width=fwd_input_shape[3], + ) + ttnn_input = ttnn.to_device(ttnn_input, device=device, memory_config=ttnn.L1_MEMORY_CONFIG) + ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT) + torch_output = torch_module(torch_input) + parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device) + ttnn_module = ttnn_c3k2( + device=device, parameter=parameters.conv_args, conv_pt=parameters, is_bk_enabled=is_bk_enabled + ) + ttnn_output = ttnn_module(x=ttnn_input, device=device) + ttnn_output = ttnn.to_torch(ttnn_output) + ttnn_output = ttnn_output.permute(0, 3, 1, 2) + ttnn_output = ttnn_output.reshape(torch_output.shape) + assert_with_pcc(torch_output, ttnn_output, 0.99) + + +@skip_for_grayskull() +@pytest.mark.parametrize( + "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape", + [ + ( + [128, 128, 128, 128, 256], + [256, 128, 128, 256, 128], + [1, 1, 3, 1, 1], + [1, 1, 1, 1, 1], + [0, 0, 1, 0, 0], + [1, 1, 1, 1, 1], + [1, 1, 128, 1, 1], + [1, 128, 7, 7], + ), + ], +) +@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) +def test_yolo_v11_psa_block( + device, + use_program_cache, + reset_seeds, + in_channel, + out_channel, + kernel, + stride, + padding, + dilation, + groups, + fwd_input_shape, +): + torch_module = torch_psa_block(in_channel, out_channel, kernel, stride, padding, dilation, groups) + torch_module.eval() + torch_input, ttnn_input = create_yolov11_input_tensors( + device, + batch=fwd_input_shape[0], + input_channels=fwd_input_shape[1], + input_height=fwd_input_shape[2], + input_width=fwd_input_shape[3], + ) + ttnn_input = ttnn.to_device(ttnn_input, device=device) + ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG) + torch_output = torch_module(torch_input) + parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device) + ttnn_module = ttnn_psa_block(device=device, parameter=parameters.conv_args, conv_pt=parameters) + ttnn_output = ttnn_module(x=ttnn_input, device=device) + ttnn_output = ttnn.to_torch(ttnn_output) + ttnn_output = ttnn_output.permute(0, 3, 1, 2) + ttnn_output = ttnn_output.reshape(torch_output.shape) + assert_with_pcc(torch_output, ttnn_output, 0.99) + + +@skip_for_grayskull() +@pytest.mark.parametrize( + "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape", + [ + ([256, 512], [128, 256], [1, 1], [1, 1], [0, 0], [1, 1], [1, 1], [1, 256, 20, 20]), + ], +) +@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) +def test_yolo_v11_sppf( + device, + use_program_cache, + reset_seeds, + in_channel, + out_channel, + kernel, + stride, + padding, + dilation, + groups, + fwd_input_shape, +): + torch_module = torch_sppf(in_channel, out_channel, kernel, stride, padding, dilation, groups) + torch_module.eval() + torch_input, ttnn_input = create_yolov11_input_tensors( + device, + batch=fwd_input_shape[0], + input_channels=fwd_input_shape[1], + input_height=fwd_input_shape[2], + input_width=fwd_input_shape[3], + ) + ttnn_input = ttnn.to_device(ttnn_input, device=device) + ttnn_input = ttnn.to_layout(ttnn_input, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG) + torch_output = torch_module(torch_input) + parameters = create_yolov11_model_parameters(torch_module, torch_input, device=device) + ttnn_module = ttnn_sppf(device=device, parameter=parameters.conv_args, conv_pt=parameters) + ttnn_output = ttnn_module(x=ttnn_input, device=device) + ttnn_output = ttnn.to_torch(ttnn_output) + ttnn_output = ttnn_output.permute(0, 3, 1, 2) + ttnn_output = ttnn_output.reshape(torch_output.shape) + assert_with_pcc(torch_output, ttnn_output, 0.99) + + +try: + sys.modules["ultralytics"] = yolov11 + sys.modules["ultralytics.nn.tasks"] = yolov11 + sys.modules["ultralytics.nn.modules.conv"] = yolov11 + sys.modules["ultralytics.nn.modules.block"] = yolov11 + sys.modules["ultralytics.nn.modules.head"] = yolov11 + +except KeyError: + print("models.experimental.functional_yolov11.reference.yolov11 not found.") + + +class Ensemble(nn.ModuleList): + def __init__(self): + super(Ensemble, self).__init__() + + def forward(self, x, augment=False): + y = [] + for module in self: + y.append(module(x, augment)[0]) + y = torch.cat(y, 1) + return y, None + + +def attempt_download(file, repo="ultralytics/assets"): + tests = Path(__file__).parent.parent / "yolov11" + file_path = tests / Path(str(file).strip().replace("'", "").lower()) + + if not file_path.exists(): + name = "yolo11n.pt" # file_path.name + msg = f"{file_path} missing, try downloading from https://github.com/{repo}/releases/" + + try: + url = f"https://github.com/{repo}/releases/download/v8.3.0/{name}" + + print(f"Downloading {url} to {file_path}...") + torch.hub.download_url_to_file(url, file_path) + + # Validate the file + assert file_path.exists() and file_path.stat().st_size > 1e6, f"Download failed for {name}" + + except Exception as e: + print(f"Error downloading from GitHub: {e}. Trying secondary source...") + + url = f"https://storage.googleapis.com/{repo}/ckpt/{name}" + print(f"Downloading {url} to {file_path}...") + os.system(f"curl -L {url} -o {file_path}") + + if not file_path.exists() or file_path.stat().st_size < 1e6: + file_path.unlink(missing_ok=True) + print(f"ERROR: Download failure for {msg}") + else: + print(f"Download succeeded from secondary source!") + return file_path + + +# Function to load weights into the model +def attempt_load(weights, map_location=None): + model = Ensemble() + + # Iterate through the weights and load them + for w in weights if isinstance(weights, list) else [weights]: + weight_path = attempt_download(w) + ckpt = torch.load(weight_path, map_location=map_location) + model.append(ckpt["ema" if ckpt.get("ema") else "model"].float().eval()) + for m in model.modules(): + if isinstance(m, (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU)): + m.inplace = True + elif isinstance(m, nn.Upsample): + m.recompute_scale_factor = None + + if len(model) == 1: + return model[-1] + else: + for k in ["names", "stride"]: + setattr(model, k, getattr(model[-1], k)) + return model + + +@skip_for_grayskull() +@pytest.mark.parametrize( + "in_channel, out_channel, kernel, stride, padding, dilation, groups,fwd_input_shape", + [ + ( + [64, 64, 64, 128, 64, 64, 256, 64, 64, 64, 64, 80, 80, 80, 128, 128, 80, 80, 80, 256, 256, 80, 80, 80, 16], + [64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 80, 80, 80, 80, 128, 80, 80, 80, 80, 256, 80, 80, 80, 80, 1], + [3, 3, 1, 3, 3, 1, 3, 3, 1, 3, 1, 3, 1, 1, 3, 1, 3, 1, 1, 3, 1, 3, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 64, 1, 80, 1, 1, 128, 1, 80, 1, 1, 256, 1, 80, 1, 1, 1], + [[1, 64, 28, 28], [1, 128, 14, 14], [1, 256, 7, 7]], + ), + ], +) +@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) +def test_yolo_v11_detect( + device, + use_program_cache, + reset_seeds, + in_channel, + out_channel, + kernel, + stride, + padding, + dilation, + groups, + fwd_input_shape, +): + torch_module = torch_detect(in_channel, out_channel, kernel, stride, padding, dilation, groups) + torch_module.eval() + torch_input_1, ttnn_input_1 = create_yolov11_input_tensors( + device, + batch=fwd_input_shape[0][0], + input_channels=fwd_input_shape[0][1], + input_height=fwd_input_shape[0][2], + input_width=fwd_input_shape[0][3], + ) + torch_input_2, ttnn_input_2 = create_yolov11_input_tensors( + device, + batch=fwd_input_shape[1][0], + input_channels=fwd_input_shape[1][1], + input_height=fwd_input_shape[1][2], + input_width=fwd_input_shape[1][3], + ) + torch_input_3, ttnn_input_3 = create_yolov11_input_tensors( + device, + batch=fwd_input_shape[2][0], + input_channels=fwd_input_shape[2][1], + input_height=fwd_input_shape[2][2], + input_width=fwd_input_shape[2][3], + ) + ttnn_input_1 = ttnn.to_device(ttnn_input_1, device=device) + ttnn_input_1 = ttnn.to_layout(ttnn_input_1, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG) + ttnn_input_2 = ttnn.to_device(ttnn_input_2, device=device) + ttnn_input_2 = ttnn.to_layout(ttnn_input_2, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG) + ttnn_input_3 = ttnn.to_device(ttnn_input_3, device=device) + ttnn_input_3 = ttnn.to_layout(ttnn_input_3, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG) + torch_output = torch_module(torch_input_1, torch_input_2, torch_input_3) + parameters = create_yolov11_model_parameters_detect( + torch_module, torch_input_1, torch_input_2, torch_input_3, device=device + ) + ttnn_module = ttnn_detect(device=device, parameter=parameters.model, conv_pt=parameters) + + ttnn_output = ttnn_module(y1=ttnn_input_1, y2=ttnn_input_2, y3=ttnn_input_3, device=device) + ttnn_output = ttnn.to_torch(ttnn_output) + ttnn_output = ttnn_output.reshape(torch_output.shape) + assert_with_pcc(torch_output, ttnn_output, 0.99) + + +@skip_for_grayskull() +@pytest.mark.parametrize( + "resolution", + [ + ([1, 3, 224, 224]), + ([1, 3, 640, 640]), + ], +) +@pytest.mark.parametrize( + "use_pretrained_weight", + [ + False, + # True # uncomment to run the model for real weights + ], + ids=[ + "pretrained_weight_false", + # "pretrained_weight_true", # uncomment to run the model for real weights + ], +) +@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) +def test_yolov11(device, use_program_cache, reset_seeds, resolution, use_pretrained_weight): + torch_input, ttnn_input = create_yolov11_input_tensors( + device, batch=resolution[0], input_channels=resolution[1], input_height=resolution[2], input_width=resolution[3] + ) + if use_pretrained_weight: + torch_model = attempt_load("yolo11n.pt", map_location="cpu") + state_dict = torch_model.state_dict() + torch_model = yolov11.YoloV11() + ds_state_dict = {k: v for k, v in state_dict.items()} + new_state_dict = {} + for (name1, parameter1), (name2, parameter2) in zip(torch_model.state_dict().items(), ds_state_dict.items()): + if isinstance(parameter2, torch.FloatTensor): + new_state_dict[name1] = parameter2 + torch_model.load_state_dict(new_state_dict) + else: + torch_model = yolov11.YoloV11() + torch_model.eval() + + torch_output = torch_model(torch_input) + parameters = create_yolov11_model_parameters(torch_model, torch_input, device=device) + ttnn_model = ttnn_yolov11.YoloV11(device, parameters) + ttnn_output = ttnn_model(ttnn_input) + ttnn_output = ttnn.to_torch(ttnn_output) + + ttnn_output = ttnn_output.reshape(torch_output.shape) + assert_with_pcc(torch_output, ttnn_output, 0.99) diff --git a/tests/ttnn/integration_tests/yolov11/test_ttnn_yolov11.py b/tests/ttnn/integration_tests/yolov11/test_ttnn_yolov11.py deleted file mode 100644 index f694d9081b9..00000000000 --- a/tests/ttnn/integration_tests/yolov11/test_ttnn_yolov11.py +++ /dev/null @@ -1,89 +0,0 @@ -# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc. - -# SPDX-License-Identifier: Apache-2.0 - -import pytest -import ttnn -import torch -from tests.ttnn.utils_for_testing import assert_with_pcc -import sys - -from models.experimental.functional_yolov11.reference import yolov11 - -from models.experimental.functional_yolov11.tt.model_preprocessing import ( - create_yolov11_input_tensors, - create_yolov11_model_parameters, -) -from models.experimental.functional_yolov11.tt import ttnn_yolov11 -import torch.nn as nn - -try: - sys.modules["ultralytics"] = yolov11 - sys.modules["ultralytics.nn.tasks"] = yolov11 - sys.modules["ultralytics.nn.modules.conv"] = yolov11 - sys.modules["ultralytics.nn.modules.block"] = yolov11 - sys.modules["ultralytics.nn.modules.head"] = yolov11 - -except KeyError: - print("models.experimental.functional_yolov11.reference.yolov11 not found.") - - -class Ensemble(nn.ModuleList): - def __init__(self): - super(Ensemble, self).__init__() - - def forward(self, x, augment=False): - y = [] - for module in self: - y.append(module(x, augment)[0]) - y = torch.cat(y, 1) - return y, None - - -def attempt_load(weights, map_location=None): - model = Ensemble() - for w in weights if isinstance(weights, list) else [weights]: - w = "models/experimental/functional_yolov11/reference/yolo11n.pt" - ckpt = torch.load(w, map_location=map_location) - model.append(ckpt["ema" if ckpt.get("ema") else "model"].float().eval()) - for m in model.modules(): - if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: - m.inplace = True - elif type(m) is nn.Upsample: - m.recompute_scale_factor = None - - if len(model) == 1: - return model[-1] - else: - for k in ["names", "stride"]: - setattr(model, k, getattr(model[-1], k)) - return model - - -@pytest.mark.parametrize("device_params", [{"l1_small_size": 79104}], indirect=True) -def test_yolov11(device, use_program_cache, reset_seeds): - torch_input, ttnn_input = create_yolov11_input_tensors(device, input_channels=3, input_height=640, input_width=640) - - torch_model = attempt_load("yolov11n.pt", map_location="cpu") - state_dict = torch_model.state_dict() - torch_model = yolov11.YoloV11() - ds_state_dict = {k: v for k, v in state_dict.items()} - new_state_dict = {} - for (name1, parameter1), (name2, parameter2) in zip(torch_model.state_dict().items(), ds_state_dict.items()): - if isinstance(parameter2, torch.FloatTensor): - new_state_dict[name1] = parameter2 - torch_model.load_state_dict(new_state_dict) - torch_model.eval() - - torch_output = torch_model(torch_input) - parameters = create_yolov11_model_parameters(torch_model, torch_input, device=device) - ttnn_model = ttnn_yolov11.YoloV11(device, parameters) - ttnn_output = ttnn_model(ttnn_input) - # l1 = torch.load("/home/ubuntu/venkatesh_yolov11/tt-metal/models/experimental/functional_yolov11/dumps/torch_out.pth") - # l1 = torch.load("/home/ubuntu/venkatesh_yolov11/tt-metal/models/experimental/functional_yolov11/dumps/tt_out.pth") - # assert_with_pcc(l1, l2, 0.99) - ttnn_output = ttnn.to_torch(ttnn_output) - # ttnn_output = ttnn_output.permute(0, 2, 1) - print(ttnn_output.shape, torch_output.shape) - ttnn_output = ttnn_output.reshape(torch_output.shape) - assert_with_pcc(torch_output, ttnn_output, 0.99999) diff --git a/tests/ttnn/unit_tests/operations/test_concat.py b/tests/ttnn/unit_tests/operations/test_concat.py index d8abf381500..397b6b74598 100644 --- a/tests/ttnn/unit_tests/operations/test_concat.py +++ b/tests/ttnn/unit_tests/operations/test_concat.py @@ -53,108 +53,112 @@ def test_concat(device, height, width, dim, async_mode): @pytest.mark.parametrize( - "inputs, output_shard_shape, shard_grid, strategy, layout, cache_mode,dtype", + "inputs, output_shard_shape, shard_grid, strategy, layout, cache_mode", ( - # ( - # [((1,1,49,128), (1,128)), ((1,1,49,128), (1,128)),((1,1,49,128), (1,128)),((1,1,49,128), (1,128))], - # (1,512), - # ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7,7))}), - # ttnn.ShardStrategy.HEIGHT, - # ttnn.ROW_MAJOR_LAYOUT, - # False, - # ttnn.bfloat16 - # ), - # ( - # [((1,1,400,128), (7,128)), ((1,1,400,128), (7,128)),((1,1,400,128), (7,128)),((1,1,400,128), (7,128))], - # (7,512), - # ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7,7))}), - # ttnn.ShardStrategy.HEIGHT, - # ttnn.ROW_MAJOR_LAYOUT, - # False, - # ttnn.bfloat16 - # ), - # ( # fp8 case #C3K - # [((1, 1, 49, 64), (1, 64)), ((1, 1, 49, 64), (1, 64))], - # (1, 128), - # ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}), - # ttnn.ShardStrategy.HEIGHT, - # ttnn.TILE_LAYOUT, - # False, - # ttnn.bfloat8_b - # ), - # ( #C3K - # [((1, 1, 196, 32), (4, 32)), ((1, 1, 196, 32), (4, 32))], - # (4, 64), - # ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}), - # ttnn.ShardStrategy.HEIGHT, - # ttnn.TILE_LAYOUT, - # False, - # ttnn.bfloat8_b - # ), - # c3k2 - # ( - # [((1, 1, 25600, 16), (400, 16)), ((1, 1, 25600, 16), (400, 16)), ((1, 1, 25600, 16), (400, 16))], - # (400, 48), - # ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}), - # ttnn.ShardStrategy.HEIGHT, - # ttnn.ROW_MAJOR_LAYOUT, - # False, - # ttnn.bfloat16, - # ), - # ( - # [((1, 1, 25600, 16), (400, 16)), ((1, 1, 25600, 16), (400, 16)), ((1, 1, 25600, 16), (400, 16))], - # (400, 48), - # ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}), - # ttnn.ShardStrategy.HEIGHT, - # ttnn.ROW_MAJOR_LAYOUT, - # False, - # ttnn.bfloat16, - # ), - # ( - # [((1, 1, 6400, 32), (102, 32)), ((1, 1, 6400, 32), (102, 32)), ((1, 1, 6400, 32), (102, 32))], - # (102, 96), - # ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}), - # ttnn.ShardStrategy.HEIGHT, - # ttnn.ROW_MAJOR_LAYOUT, - # False, - # ttnn.bfloat16, - # ), - # ( - # [((1, 1, 1600, 64), (25, 64)), ((1, 1, 1600, 64), (25, 64)), ((1, 1, 1600, 64), (25, 64))], - # (25, 192), - # ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}), - # ttnn.ShardStrategy.HEIGHT, - # ttnn.ROW_MAJOR_LAYOUT, - # False, - # ttnn.bfloat16, - # ), ( - [ - ((1, 1, 1600, 256), (25, 256)), - ((1, 1, 1600, 128), (25, 256)), - ], - (25, 384), - ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}), + [((1, 1, 160, 32), (80, 32)), ((1, 1, 160, 32), (80, 32))], + (80, 64), + ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(0, 1))}), ttnn.ShardStrategy.HEIGHT, ttnn.ROW_MAJOR_LAYOUT, False, - ttnn.bfloat16, ), - # ( - # [((1, 1, 6400, 128), (102, 128)), ((1, 1, 6400, 128), (102, 128)) ], - # (102, 256), - # ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}), - # ttnn.ShardStrategy.HEIGHT, - # ttnn.ROW_MAJOR_LAYOUT, - # False, - # ttnn.bfloat16, - # ), + ( + [((1, 1, 160, 32), (80, 32)), ((1, 1, 160, 16), (80, 16))], + (80, 48), + ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(0, 1))}), + ttnn.ShardStrategy.HEIGHT, + ttnn.ROW_MAJOR_LAYOUT, + False, + ), + ( + [((1, 1, 25600, 64), (512, 64)), ((1, 1, 25600, 64), (512, 64))], + (512, 128), + ttnn.CoreRangeSet( + { + ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 5)), + ttnn.CoreRange(ttnn.CoreCoord(0, 6), ttnn.CoreCoord(1, 6)), + } + ), + ttnn.ShardStrategy.HEIGHT, + ttnn.ROW_MAJOR_LAYOUT, + False, + ), + pytest.param( + [((1, 1, 25600, 64), (512, 64)), ((1, 1, 25600, 64), (512, 64))], + (512, 128), + ttnn.CoreRangeSet( + { + ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 5)), + ttnn.CoreRange(ttnn.CoreCoord(0, 6), ttnn.CoreCoord(1, 6)), + } + ), + ttnn.ShardStrategy.HEIGHT, + ttnn.ROW_MAJOR_LAYOUT, + True, + ), + ( + [((1, 1, 16, 16), (8, 16)), ((1, 1, 16, 16), (8, 16)), ((1, 1, 16, 16), (8, 16))], + (8, 48), + ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(0, 1))}), + ttnn.ShardStrategy.HEIGHT, + ttnn.ROW_MAJOR_LAYOUT, + False, + ), + ( + [((1, 1, 16, 16), (8, 16)), ((1, 1, 16, 16), (8, 16)), ((1, 1, 16, 16), (8, 16))], + (8, 48), + ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(0, 1))}), + ttnn.ShardStrategy.HEIGHT, + ttnn.ROW_MAJOR_LAYOUT, + True, + ), + ( + [((1, 1, 8, 64), (8, 16)), ((1, 1, 7, 64), (7, 16)), ((1, 1, 23, 64), (23, 16))], + (38, 16), + ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(0, 3))}), + ttnn.ShardStrategy.WIDTH, + ttnn.ROW_MAJOR_LAYOUT, + False, + ), + ( + [((1, 1, 8, 64), (8, 16)), ((1, 1, 7, 64), (7, 16)), ((1, 1, 23, 64), (23, 16))], + (38, 16), + ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(0, 3))}), + ttnn.ShardStrategy.WIDTH, + ttnn.ROW_MAJOR_LAYOUT, + True, + ), + ( + [((1, 1, 256, 96), (64, 96)), ((1, 1, 256, 64), (64, 64)), ((1, 1, 256, 32), (64, 32))], + (64, 192), + ttnn.CoreRangeSet( + { + ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(0, 1)), + ttnn.CoreRange(ttnn.CoreCoord(1, 0), ttnn.CoreCoord(2, 0)), + } + ), + ttnn.ShardStrategy.HEIGHT, + ttnn.TILE_LAYOUT, + False, + ), + ( + [((1, 1, 32, 512), (32, 64)), ((1, 1, 64, 512), (64, 64)), ((1, 1, 96, 512), (96, 64))], + (192, 64), + ttnn.CoreRangeSet( + { + ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(0, 3)), + ttnn.CoreRange(ttnn.CoreCoord(1, 0), ttnn.CoreCoord(2, 1)), + } + ), + ttnn.ShardStrategy.WIDTH, + ttnn.TILE_LAYOUT, + False, + ), ), ) @pytest.mark.parametrize("async_mode", [True, False], ids=["async_on", "async_off"]) -def test_sharded_concat( - device, inputs, output_shard_shape, shard_grid, strategy, layout, cache_mode, async_mode, dtype -): +def test_sharded_concat(device, inputs, output_shard_shape, shard_grid, strategy, layout, cache_mode, async_mode): device.enable_async(async_mode) if cache_mode: device.enable_program_cache() @@ -172,7 +176,7 @@ def _gen_inputs(input_specs): use_height_and_width_as_shard_shape=True, ) torch_input_tensor = torch.rand(shape, dtype=torch.bfloat16) - input_tensor = ttnn.from_torch(torch_input_tensor, dtype=dtype, layout=layout, device=device) + input_tensor = ttnn.from_torch(torch_input_tensor, layout=layout, device=device) input_tensor = ttnn.to_memory_config(input_tensor, input_sharded_memory_config) input_tensors.append((torch_input_tensor, input_tensor)) return input_tensors