Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support Int32 datatype #384

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft

Support Int32 datatype #384

wants to merge 1 commit into from

Conversation

brataTT
Copy link
Contributor

@brataTT brataTT commented Feb 28, 2025

Problem description

Follow up of tt-mlir Commit a1af497

After tt-mlir Commit a1af497 added support for Int32 dtype, tt-torch tests fail when it can't convert the new type to a pytorch dtype.

What's changed

Added conversion support between TT::Int32 to Pytorch::Int cherry-picked from @jnie-TT's fix branch

Checklist

  • [-] New/Existing tests provide coverage for changes
  • Wait for referenced commit to be uplifted

@brataTT brataTT requested review from kmabeeTT and jnie-TT February 28, 2025 21:32
@codecov-commenter
Copy link

codecov-commenter commented Feb 28, 2025

❌ 17 Tests Failed:

Tests completed Failed Passed Skipped
483 17 466 7
View the full list of 3 ❄️ flaky tests
tests.models.torchvision.test_torchvision_image_classification::test_torchvision_image_classification[full-eval-mobilenet_v2]

Flake rate in main: 28.57% (Passed 5 times, Failed 2 times)

Stack Traces | 11.6s run time
record_property = <function record_property.<locals>.append_property at 0x7fb6a9947740>
model_info = ('mobilenet_v2', 'MobileNet_V2_Weights'), mode = 'eval'
op_by_op = False

    @pytest.mark.parametrize(
        "model_info", model_info_list, ids=[info[0] for info in model_info_list]
    )
    @pytest.mark.parametrize("mode", ["train", "eval"])
    @pytest.mark.parametrize("op_by_op", [True, False], ids=["op_by_op", "full"])
    def test_torchvision_image_classification(record_property, model_info, mode, op_by_op):
        if mode == "train":
            pytest.skip()
    
        cc = CompilerConfig()
        cc.enable_consteval = True
        cc.consteval_parameters = True
        if op_by_op:
            cc.compile_depth = CompileDepth.EXECUTE_OP_BY_OP
    
        tester = ThisTester(
            model_info,
            mode,
            required_pcc=0.98,
            assert_pcc=True,
            assert_atol=False,
            compiler_config=cc,
            record_property_handle=record_property,
        )
>       results = tester.test_model()

.../models/torchvision/test_torchvision_image_classification.py:117: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
tests/utils.py:274: in test_model
    return self.test_model_eval(on_device)
.../venv/lib/python3.11.../torch/utils/_contextlib.py:116: in decorate_context
    return func(*args, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <test_torchvision_image_classification.ThisTester object at 0x7fb739f4e110>
on_device = True

    @torch.no_grad()
    def test_model_eval(self, on_device=True):
        model = (
            self.framework_model.eval()
            if hasattr(self.framework_model, "eval")
            else self.framework_model
        )
        golden = self.get_golden_outputs(model, self.inputs)
    
        if on_device == True:
            model = self.compile_model(model, self.compiler_config)
    
        outputs = self.run_model(model, self.inputs)
        assert type(outputs) == type(
            golden
        ), "Expecting the type of both calculated and golden to be identical. Whether that be a tensor, list, dictonary, etc."
    
        passed_pcc, passed_atol, msg, err_msg, pccs, atols = verify_against_golden(
            self._extract_outputs(golden),
            self._extract_outputs(outputs),
            self.required_pcc,
            self.required_atol,
            self.relative_atol,
        )
        self.record_tag_cache["pccs"] = pccs
        self.record_tag_cache["atols"] = atols
    
        if self.assert_pcc and self.assert_atol:
            if passed_pcc and passed_atol:
                print(msg)
            else:
                assert False, err_msg
        elif not self.assert_pcc and self.assert_atol:
            print("Ignoring PCC check\n")
            if passed_atol:
                print(msg)
            else:
                assert False, err_msg
        elif self.assert_pcc and not self.assert_atol:
            print("Ignoring ATOL check\n")
            if passed_pcc:
                print(msg)
            else:
>               assert False, err_msg
E               AssertionError: PCC of output 0: 0.0439, threshold: 0.98E               ATOL of output 0: 7.0625, threshold: 0.01tests/utils.py:263: AssertionError
tests.models.torchvision.test_torchvision_image_classification::test_torchvision_image_classification[full-eval-resnext50_32x4d]

Flake rate in main: 28.57% (Passed 5 times, Failed 2 times)

Stack Traces | 30.3s run time
record_property = <function record_property.<locals>.append_property at 0x7fb70c3859e0>
model_info = ('resnext50_32x4d', 'ResNeXt50_32X4D_Weights'), mode = 'eval'
op_by_op = False

    @pytest.mark.parametrize(
        "model_info", model_info_list, ids=[info[0] for info in model_info_list]
    )
    @pytest.mark.parametrize("mode", ["train", "eval"])
    @pytest.mark.parametrize("op_by_op", [True, False], ids=["op_by_op", "full"])
    def test_torchvision_image_classification(record_property, model_info, mode, op_by_op):
        if mode == "train":
            pytest.skip()
    
        cc = CompilerConfig()
        cc.enable_consteval = True
        cc.consteval_parameters = True
        if op_by_op:
            cc.compile_depth = CompileDepth.EXECUTE_OP_BY_OP
    
        tester = ThisTester(
            model_info,
            mode,
            required_pcc=0.98,
            assert_pcc=True,
            assert_atol=False,
            compiler_config=cc,
            record_property_handle=record_property,
        )
>       results = tester.test_model()

.../models/torchvision/test_torchvision_image_classification.py:117: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
tests/utils.py:274: in test_model
    return self.test_model_eval(on_device)
.../venv/lib/python3.11.../torch/utils/_contextlib.py:116: in decorate_context
    return func(*args, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <test_torchvision_image_classification.ThisTester object at 0x7fb739bcaad0>
on_device = True

    @torch.no_grad()
    def test_model_eval(self, on_device=True):
        model = (
            self.framework_model.eval()
            if hasattr(self.framework_model, "eval")
            else self.framework_model
        )
        golden = self.get_golden_outputs(model, self.inputs)
    
        if on_device == True:
            model = self.compile_model(model, self.compiler_config)
    
        outputs = self.run_model(model, self.inputs)
        assert type(outputs) == type(
            golden
        ), "Expecting the type of both calculated and golden to be identical. Whether that be a tensor, list, dictonary, etc."
    
        passed_pcc, passed_atol, msg, err_msg, pccs, atols = verify_against_golden(
            self._extract_outputs(golden),
            self._extract_outputs(outputs),
            self.required_pcc,
            self.required_atol,
            self.relative_atol,
        )
        self.record_tag_cache["pccs"] = pccs
        self.record_tag_cache["atols"] = atols
    
        if self.assert_pcc and self.assert_atol:
            if passed_pcc and passed_atol:
                print(msg)
            else:
                assert False, err_msg
        elif not self.assert_pcc and self.assert_atol:
            print("Ignoring PCC check\n")
            if passed_atol:
                print(msg)
            else:
                assert False, err_msg
        elif self.assert_pcc and not self.assert_atol:
            print("Ignoring ATOL check\n")
            if passed_pcc:
                print(msg)
            else:
>               assert False, err_msg
E               AssertionError: PCC of output 0: -0.0611, threshold: 0.98E               ATOL of output 0: 768.0000, threshold: 0.01tests/utils.py:263: AssertionError
tests.models.torchvision.test_torchvision_image_classification::test_torchvision_image_classification[full-eval-regnet_x_400mf]

Flake rate in main: 28.57% (Passed 5 times, Failed 2 times)

Stack Traces | 33.2s run time
record_property = <function record_property.<locals>.append_property at 0x7fb6aac43240>
model_info = ('regnet_x_400mf', 'RegNet_X_400MF_Weights'), mode = 'eval'
op_by_op = False

    @pytest.mark.parametrize(
        "model_info", model_info_list, ids=[info[0] for info in model_info_list]
    )
    @pytest.mark.parametrize("mode", ["train", "eval"])
    @pytest.mark.parametrize("op_by_op", [True, False], ids=["op_by_op", "full"])
    def test_torchvision_image_classification(record_property, model_info, mode, op_by_op):
        if mode == "train":
            pytest.skip()
    
        cc = CompilerConfig()
        cc.enable_consteval = True
        cc.consteval_parameters = True
        if op_by_op:
            cc.compile_depth = CompileDepth.EXECUTE_OP_BY_OP
    
        tester = ThisTester(
            model_info,
            mode,
            required_pcc=0.98,
            assert_pcc=True,
            assert_atol=False,
            compiler_config=cc,
            record_property_handle=record_property,
        )
>       results = tester.test_model()

.../models/torchvision/test_torchvision_image_classification.py:117: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
tests/utils.py:274: in test_model
    return self.test_model_eval(on_device)
.../venv/lib/python3.11.../torch/utils/_contextlib.py:116: in decorate_context
    return func(*args, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <test_torchvision_image_classification.ThisTester object at 0x7fb6a2192f10>
on_device = True

    @torch.no_grad()
    def test_model_eval(self, on_device=True):
        model = (
            self.framework_model.eval()
            if hasattr(self.framework_model, "eval")
            else self.framework_model
        )
        golden = self.get_golden_outputs(model, self.inputs)
    
        if on_device == True:
            model = self.compile_model(model, self.compiler_config)
    
        outputs = self.run_model(model, self.inputs)
        assert type(outputs) == type(
            golden
        ), "Expecting the type of both calculated and golden to be identical. Whether that be a tensor, list, dictonary, etc."
    
        passed_pcc, passed_atol, msg, err_msg, pccs, atols = verify_against_golden(
            self._extract_outputs(golden),
            self._extract_outputs(outputs),
            self.required_pcc,
            self.required_atol,
            self.relative_atol,
        )
        self.record_tag_cache["pccs"] = pccs
        self.record_tag_cache["atols"] = atols
    
        if self.assert_pcc and self.assert_atol:
            if passed_pcc and passed_atol:
                print(msg)
            else:
                assert False, err_msg
        elif not self.assert_pcc and self.assert_atol:
            print("Ignoring PCC check\n")
            if passed_atol:
                print(msg)
            else:
                assert False, err_msg
        elif self.assert_pcc and not self.assert_atol:
            print("Ignoring ATOL check\n")
            if passed_pcc:
                print(msg)
            else:
>               assert False, err_msg
E               AssertionError: PCC of output 0: 0.7462, threshold: 0.98E               ATOL of output 0: 3.8438, threshold: 0.01tests/utils.py:263: AssertionError

To view more test analytics, go to the Test Analytics Dashboard
📋 Got 3 mins? Take this short survey to help us improve Test Analytics.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants