Skip to content

Commit

Permalink
Squashed commit of the following:
Browse files Browse the repository at this point in the history
commit 18db5586d1a45f65fd98ea1a21d5fb87db5d2dbf
Author: Lisa Ong <[email protected]>
Date:   Thu Mar 16 03:46:54 2023 +0000

    Merged PR 3160: [security] bump onnx to 1.13.0

    This resolves a high severity dependabot alert

commit 07d16bf787bffa3be93dd7902a402e7e5e660596
Author: Mason Remy <[email protected]>
Date:   Thu Mar 16 02:17:51 2023 +0000

    Merged PR 3157: Dynamic split dim tests

    Dynamic split dim tests

commit 7c5b9a18adbba2ec10461118fb061365e34f5ed0
Author: Denny Sun <[email protected]>
Date:   Wed Mar 15 01:47:45 2023 +0000

    Merged PR 3158: Do not unroll the profiling ops when vectorization enabled

    when vectorization is enabled, the ops in kernel get unrolled, for example, without this fix the timer added to inner kernel will have 8 copies, which is definitely wrong.

commit df217f2e731c2609674da57662eaf1ed6b4a40b0
Author: Denny Sun <[email protected]>
Date:   Mon Mar 13 06:18:41 2023 +0000

    Merged PR 3153: Fix the lowering issue of the profiling ops

    With this fix the kernel level profiling support can work end to end. Here is some example about how to use it:

    ```
            @tile_nest.iteration_logic
            def _tile_logic():
                EnterProfileRegion("pack_b_fn_outer")
                pack_b_fn(B, B_temp, j, k)
                ExitProfileRegion("pack_b_fn_outer")

                EnterProfileRegion("matmul_fn_outer")
                matmul_fn(A, B, C, B_temp, i, j, k)
                ExitProfileRegion("matmul_fn_outer")

                PrintProfileResults()
    ```

    The timings printed out look like:

    ```
    matmul_fn_outer 1       0.000100 ms
    pack_b_fn_outer 1       0.000400 ms
    matmul_fn_outer 2       0.000400 ms
    pack_b_fn_outer 2       0.001200 ms
    matmul_fn_outer 3       0.000600 ms
    pack_b_fn_outer 3       0.001700 ms
    matmul_fn_outer 4       0.000800 ms
    pack_b_fn_outer 4       0.002300 ms
    matmul_fn_outer 5       0.000900 ms
    pack_b_fn_outer 5       0.002700 ms
    matmul_fn_outer 6       0.001200 ms
    pack_b_fn_outer 6       0.003200 ms
    matmul_fn_outer 7       0.001500 ms
    pack_b_fn_outer 7       0.003700 ms
    matmul_fn_outer 8       0.001700 ms
    pack_b_fn_outer 8       0.004000 ms
    matmul_fn_outer 9       0.002000 ms
    pack_b_fn_outer 9       0.004500 ms
    matmul_fn_outer 10      0.002200 ms
    pack_b_fn_outer 10      0.004800 ms
    matmul_fn_outer 11      0.002400 ms
    pack_b_fn_outer 11      0.005300 ms
    matmul_fn_outer 12      0.002700 ms
    pack_b_fn_outer 12      0.006500 ms
    matmul_fn_outer 13      0.003100 ms
    pack_b_fn_outer 13      0.007400 ms
    matmul_fn_outer 14      0.003400 ms
    pack_b_fn_outer 14      0.007800 ms
    matmul_fn_outer 15      0.003700 ms
    pack_b_fn_outer 15      0.008300 ms
    matmul_fn_outer 16      0.004000 ms
    pack_b_fn_outer 16      0.008800 ms
    matmul_fn_outer 17      0.004400 ms
    pack_b_fn_outer 17      0.009199 ms
    matmul_fn_outer 18      0.004800 ms
    pack_b_fn_outer 18      0.009599 ms
    matmul_fn_outer 19      0.005100 ms
    pack_b_fn_outer 19      0.010099 ms
    matmul_fn_outer 20      0.005400 ms
    pack_b_fn_outer 20      0.010599 ms
    matmul_fn_outer 21      0.006000 ms
    pack_b_fn_outer 21      0.011299 ms
    matmul_fn_outer 22      0.006300 ms
    pack_b_fn_outer 22      0.011899 ms
    matmul_fn_outer 23      0.006500 ms
    pack_b_fn_outer 23      0.012299 ms
    matmul_fn_outer 24      0.006701 ms
    pack_b_fn_outer 24      0.012699 ms
    matmul_fn_outer 25      0.006901 ms
    pack_b_fn_outer 25      0.013099 ms
    matmul_fn_outer 26      0.007101 ms
    pack_b_fn_outer 26      0.013399 ms
    matmul_fn_outer 27      0.007300 ms
    pack_b_fn_outer 27      0.013799 ms
    matmul_fn_outer 28      0.007401 ms
    pack_b_fn_outer 28      0.014100 ms
    matmul_fn_outer 29      0.007601 ms
    pack_b_fn_outer 29      0.014600 ms
    matmul_fn_outer 30      0.007801 ms
    pack_b_fn_outer 30      0.015000 ms
    matmul_fn_outer 31      0.007901 ms
    pack_b_fn_outer 31      0.015399 ms
    matmul_fn_outer 32      0.008101 ms
    pack_b_fn_outer 32      0.015699 ms
    matmul_fn_outer 33      0.008301 ms
    pack_b_fn_outer 33      0.015999 ms
    matmul_fn_outer 34      0.008601 ms
    pack_b_fn_outer 34      0.016...

commit 3572c2b081198e1631f2df208c07490c6d4b4bf5
Author: Lisa Ong <[email protected]>
Date:   Fri Mar 10 10:57:39 2023 +0000

    Merged PR 3152: [nfc] [test] Skip fast_exp mlas tests on unsupported Aarch64

    These tests generate `llvm.x86.avx.max.ps.256` which is not supported on non-intel processors like Apple M1

    ```
      %28 = load <8 x float>, <8 x float>* %27, align 4, !dbg !19
      %29 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %28, <8 x float> <float 0xC0561814A0000000, float 0xC0561814A0000000, float 0xC0561814A0000000, float 0xC0561814A0000000, float 0xC0561814A0000000, float 0xC0561814A0000000, float 0xC0561814A0000000, float 0xC0561814A0000000>), !dbg !20
      %30 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %29, <8 x float> <float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000>, <8 x float> <float 0x4168000000000000, float 0x4168000000000000, float 0x4168000000000000, float 0x4168000000000000, float 0x4168000000000000, float 0x4168000000000000, float 0x4168000000000000, float 0x4168000000000000>), !dbg !21
      %31 = fsub <8 x float> %30, <float 0x4168000000000000, float 0x4168000000000000, float 0x4168000000000000, float 0x4168000000000000, float 0x4168000000000000, float 0x4168000000000000, float 0x4168000000000000, float 0x4168000000000000>, !dbg !22

    ```
  • Loading branch information
Lisa Ong committed Mar 16, 2023
1 parent 7ac5fe4 commit f55e4d8
Show file tree
Hide file tree
Showing 8 changed files with 261 additions and 93 deletions.
2 changes: 1 addition & 1 deletion accera/onnx-emitter/test/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
onnx==1.9.0
onnx==1.13.0
onnxruntime==1.7.0
protobuf==3.20.2 # tied to the version of onnx above
sympy
1 change: 0 additions & 1 deletion accera/python/accera/test/dsl_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -6413,7 +6413,6 @@ def _verify_func(
after=correctness_check_values["post"],
)

@expectedFailure(FailedReason.NOT_IN_CORE, "Fail to lower to llvm")
def test_profiling_nested_function_calls(self):
test_name = "test_profiling_nested_function_calls"

Expand Down
95 changes: 95 additions & 0 deletions accera/python/accera/test/smoke_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,9 +664,17 @@ def _():

v.check_correctness(function.name, before=(In_test, Out_test), after=(In_test, Out_ref))

@expectedFailure(
FailedReason.INVALID, "avx2 instructions not supported on MacOS arm64", sys.platform == "darwin"
and platform.machine() == "arm64"
)
def test_fast_exp_mlas_w_func_level_precision(self):
self._test_fast_exp_mlas(True)

@expectedFailure(
FailedReason.INVALID, "avx2 instructions not supported on MacOS arm64", sys.platform == "darwin"
and platform.machine() == "arm64"
)
def test_fast_exp_mlas_w_pkg_level_precision(self):
self._test_fast_exp_mlas(False)

Expand Down Expand Up @@ -1808,6 +1816,93 @@ def packed_index(i_outer, i_middle, i_inner, j_outer, j_middle, j_inner, tile_of
)] = test_input[i_outer + i_middle + i_inner, j_outer + j_middle + j_inner]
v.check_correctness(function.name, before=(test_input, test_output), after=(test_input, test_output_ref))

@expectedFailure(FailedReason.BUG, "_split_dimension of a dynamically sized dimension with a dynamic size is not working")
def test_dynamic_split_dim_dynamic_size(self) -> None:
from accera import create_dimensions
test_name = "test_dynamic_split_dim_dynamic_size"

M, N, MN = create_dimensions()

package = Package()

Input = Array(role=Role.INPUT, element_type=ScalarType.float32, shape=(MN,))
Output = Array(role=Role.INPUT_OUTPUT, element_type=ScalarType.float32, shape=(M, N))

nest = Nest(shape=(M, N))
i, j = nest.get_indices()

@nest.iteration_logic
def _():
split_input = Input._split_dimension(0, N)
Output[i, j] = split_input[i, j]

fn = package.add(
nest,
args=(MN, M, N, Input, Output),
base_name=f"{test_name}_fn"
)

output_dir = pathlib.Path(TEST_PACKAGE_DIR) / test_name
shutil.rmtree(output_dir, ignore_errors=True)

with verifiers.VerifyPackage(self, test_name, output_dir) as v:
package.build(
name=test_name, format=self.PACKAGE_FORMAT, mode=self.PACKAGE_MODE, output_dir=output_dir, _quiet=False
)

# correctness check
test_M = 64
test_N = 16
test_MN = test_M*test_N
test_input = np.random.random([test_M*test_N]).astype(np.float32)
test_output = np.random.random([test_M, test_N]).astype(np.float32)
test_output_ref = test_input.copy().reshape((test_M, test_N))
v.check_correctness(function.name, before=(test_MN, test_M, test_N, test_input, test_output), after=(test_MN, test_M, test_N, test_input, test_output_ref))

@expectedFailure(FailedReason.BUG, "_split_dimension of a dynamically sized dimension with a static size is not working")
def test_dynamic_split_dim_static_size(self) -> None:
from accera import create_dimensions
test_name = "test_dynamic_split_dim_static_size"

M, MN = create_dimensions()
N = 16

package = Package()

Input = Array(role=Role.INPUT, element_type=ScalarType.float32, shape=(MN,))
Output = Array(role=Role.INPUT_OUTPUT, element_type=ScalarType.float32, shape=(M, N))

nest = Nest(shape=(M, N))
i, j = nest.get_indices()

@nest.iteration_logic
def _():
split_input = Input._split_dimension(0, cast(16, ScalarType.index))
Output[i, j] = split_input[i, j]

fn = package.add(
nest,
args=(MN, M, Input, Output),
base_name=f"{test_name}_fn"
)

output_dir = pathlib.Path(TEST_PACKAGE_DIR) / test_name
shutil.rmtree(output_dir, ignore_errors=True)

with verifiers.VerifyPackage(self, test_name, output_dir) as v:
package.build(
name=test_name, format=self.PACKAGE_FORMAT, mode=self.PACKAGE_MODE, output_dir=output_dir, _quiet=False
)

# correctness check
test_M = 64
test_N = N
test_MN = test_M*test_N
test_input = np.random.random([test_M*test_N]).astype(np.float32)
test_output = np.random.random([test_M, test_N]).astype(np.float32)
test_output_ref = test_input.copy().reshape((test_M, test_N))
v.check_correctness(function.name, before=(test_MN, test_M, test_input, test_output), after=(test_MN, test_M, test_input, test_output_ref))

def test_padded_nchwc_conv2d_manual_cache(self) -> None:
input_channels = 64
base_input_shape = (input_channels, 28, 28) # CHW order
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,15 @@ class OperationPass;
class RewritePatternSet;
} // namespace mlir

namespace
{
struct ProfileRegions;
}

namespace accera::transforms::value
{
void populateVectorizeValueOpPatterns(mlir::RewritePatternSet& patterns);
void populateValueToStandardPatterns(bool enableProfiling, mlir::RewritePatternSet& patterns);
void populateValueToStandardPatterns(bool enableProfiling, ProfileRegions& profileRegions, mlir::RewritePatternSet& patterns);
void populateValueLaunchFuncPatterns(mlir::RewritePatternSet& patterns);
void populateValueModuleRewritePatterns(mlir::RewritePatternSet& patterns);

Expand Down
Loading

0 comments on commit f55e4d8

Please sign in to comment.