Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
auto merge of rust-lang#19294 : huonw/rust/transmute-inplace, r=nikom…
…atsakis This detects (a subset of) the cases when `transmute::<T, U>(x)` can be lowered to a direct `bitcast T x to U` in LLVM. This assists with efficiently handling a SIMD vector as multiple different types, e.g. swapping bytes/words/double words around inside some larger vector type. C compilers like GCC and Clang handle integer vector types as `__m128i` for all widths, and implicitly insert bitcasts as required. This patch allows Rust to express this, even if it takes a bit of `unsafe`, whereas previously it was impossible to do at all without inline assembly. Example: pub fn reverse_u32s(u: u64x2) -> u64x2 { unsafe { let tmp = mem::transmute::<_, u32x4>(u); let swapped = u32x4(tmp.3, tmp.2, tmp.1, tmp.0); mem::transmute::<_, u64x2>(swapped) } } Compiling with `--opt-level=3` gives: Before define <2 x i64> @_ZN12reverse_u32s20hbdb206aba18a03d8tbaE(<2 x i64>) unnamed_addr #0 { entry-block: %1 = bitcast <2 x i64> %0 to i128 %u.0.extract.trunc = trunc i128 %1 to i32 %u.4.extract.shift = lshr i128 %1, 32 %u.4.extract.trunc = trunc i128 %u.4.extract.shift to i32 %u.8.extract.shift = lshr i128 %1, 64 %u.8.extract.trunc = trunc i128 %u.8.extract.shift to i32 %u.12.extract.shift = lshr i128 %1, 96 %u.12.extract.trunc = trunc i128 %u.12.extract.shift to i32 %2 = insertelement <4 x i32> undef, i32 %u.12.extract.trunc, i64 0 %3 = insertelement <4 x i32> %2, i32 %u.8.extract.trunc, i64 1 %4 = insertelement <4 x i32> %3, i32 %u.4.extract.trunc, i64 2 %5 = insertelement <4 x i32> %4, i32 %u.0.extract.trunc, i64 3 %6 = bitcast <4 x i32> %5 to <2 x i64> ret <2 x i64> %6 } _ZN12reverse_u32s20hbdb206aba18a03d8tbaE: .cfi_startproc movd %xmm0, %rax punpckhqdq %xmm0, %xmm0 movd %xmm0, %rcx movq %rcx, %rdx shrq $32, %rdx movq %rax, %rsi shrq $32, %rsi movd %eax, %xmm0 movd %ecx, %xmm1 punpckldq %xmm0, %xmm1 movd %esi, %xmm2 movd %edx, %xmm0 punpckldq %xmm2, %xmm0 punpckldq %xmm1, %xmm0 retq After define <2 x i64> @_ZN12reverse_u32s20hbdb206aba18a03d8tbaE(<2 x i64>) unnamed_addr #0 { entry-block: %1 = bitcast <2 x i64> %0 to <4 x i32> %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> %3 = bitcast <4 x i32> %2 to <2 x i64> ret <2 x i64> %3 } _ZN12reverse_u32s20hbdb206aba18a03d8tbaE: .cfi_startproc pshufd $27, %xmm0, %xmm0 retq
- Loading branch information