Implement 'toReg' homogeneous behavior between SIMD & sequential code.

aff3ct · Nov 10, 2024 · f727ba9 · f727ba9
1 parent 55523fa
commit f727ba9
Show file tree

Hide file tree

Showing 7 changed files with 46 additions and 7 deletions.
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -663,6 +663,7 @@ test-linux-armv8-unit-sve:
       artifacts: true
   tags:
     - arm64
+    - armie
     - linux
     - docker
   script:

diff --git a/README.md b/README.md
@@ -568,12 +568,12 @@ The `pipe` keyword stands for the "&#124;" binary operator.
 
 ### Conversions and Packing
 
-| **Short name** | **Prototype**                                        | **Documentation**                                                                                                                   | **Supported types**                                                                                                                                                    |
-| :---           | :---                                                 | :---                                                                                                                                | :---                                                                                                                                                                   |
-| `toReg`        | `Reg<T>  toReg (const Msk<N> m)`                     | Converts the mask `m` into a register of type `T`, the number of elements `N` has to be the same for the mask and the register.     | `double`, `float`, `int64_t`, `uint64_t`, `int32_t`, `uint32_t`, `int16_t`, `uint16_t`, `int8_t`, `uint8_t`                                                            |
-| `cvt`          | `Reg<T2> cvt   (const Reg<T1> r)`                    | Converts the elements of `r` into an other representation (the new representation and the original one have to have the same size). | `float -> int32_t`, `float -> uint32_t`, `int32_t -> float`, `uint32_t -> float`, `double -> int64_t`, `double -> uint64_t`, `int64_t -> double`, `uint64_t -> double` |
-| `cvt`          | `Reg<T2> cvt   (const Reg_2<T1> r)`                  | Converts elements of `r` into bigger elements (in bits).                                                                            | `int8_t -> int16_t`, `uint8_t -> uint16_t`, `int16_t -> int32_t`, `uint16_t -> uint32_t`, `int32_t -> int64_t`, `uint32_t -> uint64_t`                                 |
-| `pack`         | `Reg<T2> pack  (const Reg<T1> r1, const Reg<T1> r2)` | Packs elements of `r1` and `r2` into smaller elements (some information can be lost in the conversion).                             | `int32_t -> int16_t`, `uint32_t -> uint16_t`, `int16_t -> int8_t`, `uint16_t -> uint8_t`                                                                               |
+| **Short name** | **Prototype**                                        | **Documentation**                                                                                                                                                                                                                                                                                                                                   | **Supported types**                                                                                                                                                    |
+| :---           | :---                                                 | :---                                                                                                                                                                                                                                                                                                                                                | :---                                                                                                                                                                   |
+| `toReg`        | `Reg<T>  toReg (const Msk<N> m)`                     | Converts the mask `m` into a register of type `T`, the number of elements `N` has to be the same for the mask and the register. If the mask is `false` then all the bits of the corresponding element are set to 0, otherwise if the mask is `true` then all the bits are set to 1 (be careful, for float datatypes `true` is interpreted as NaN!). | `double`, `float`, `int64_t`, `uint64_t`, `int32_t`, `uint32_t`, `int16_t`, `uint16_t`, `int8_t`, `uint8_t`                                                            |
+| `cvt`          | `Reg<T2> cvt   (const Reg<T1> r)`                    | Converts the elements of `r` into an other representation (the new representation and the original one have to have the same size).                                                                                                                                                                                                                 | `float -> int32_t`, `float -> uint32_t`, `int32_t -> float`, `uint32_t -> float`, `double -> int64_t`, `double -> uint64_t`, `int64_t -> double`, `uint64_t -> double` |
+| `cvt`          | `Reg<T2> cvt   (const Reg_2<T1> r)`                  | Converts elements of `r` into bigger elements (in bits).                                                                                                                                                                                                                                                                                            | `int8_t -> int16_t`, `uint8_t -> uint16_t`, `int16_t -> int32_t`, `uint16_t -> uint32_t`, `int32_t -> int64_t`, `uint32_t -> uint64_t`                                 |
+| `pack`         | `Reg<T2> pack  (const Reg<T1> r1, const Reg<T1> r2)` | Packs elements of `r1` and `r2` into smaller elements (some information can be lost in the conversion).                                                                                                                                                                                                                                             | `int32_t -> int16_t`, `uint32_t -> uint16_t`, `int16_t -> int8_t`, `uint16_t -> uint8_t`                                                                               |
 
 ### Arithmetic Operations
 

diff --git a/TODO.md b/TODO.md
@@ -1,5 +1,6 @@
 # TODO
 
+- [ ] Add left operator functions (ex: "5 + mipp::Reg" should work then!)
 - [ ] Fix and improve GitHub actions
 - [ ] Create a docker image with "Intel Software Development Emulator" to enable 
       AVX-512 instructions emulation on the runners that does not support

diff --git a/examples/mask.cpp b/examples/mask.cpp
@@ -65,6 +65,16 @@ int main(int argc, char** argv)
 	std::cout << "m_4  = " << m_4  << std::endl;
 	std::cout << std::endl;
 
+    std::cout << "r_1 = m_1.toReg<T>(): " << std::endl;
+    mipp::Reg<T> r_1 = m_1.toReg<T>();
+    std::cout << "r_1 = " << r_1 << std::endl;
+    std::cout << std::endl;
+
+    std::cout << "r_2 = m_2.toReg<T>(): " << std::endl;
+    mipp::Reg<T> r_2 = m_2.toReg<T>();
+    std::cout << "r_2 = " << r_2 << std::endl;
+    std::cout << std::endl;
+
 	std::cout << "Output vector (mout = m_3; mout.set0()): " << std::endl;
 	auto mout = m_3;
 	mout.set0();

diff --git a/include/mipp_object.hxx b/include/mipp_object.hxx
@@ -568,7 +568,7 @@ public:
 	template <typename T>
 	inline Reg<T> toReg() const
 	{
-		return this->m ? (T)1 : (T)0;
+		return this->m ? mipp_scop::All_one_bits<T>::make() : (T)0;
 	}
 #endif
 

diff --git a/include/mipp_scalar_op.h b/include/mipp_scalar_op.h
@@ -29,6 +29,11 @@ namespace mipp_scop // My Intrinsics Plus Plus SCalar OPerations
 
 	template <typename T>
 	inline T lshift(const T val, const int n);
+
+	template <typename T>
+	struct All_one_bits {
+		static inline T make();
+	};
 }
 
 #include "mipp_scalar_op.hxx"

diff --git a/include/mipp_scalar_op.hxx b/include/mipp_scalar_op.hxx
@@ -59,4 +59,26 @@ template <          > inline int64_t rshift(const int64_t val, const int n) { re
 template <          > inline int32_t rshift(const int32_t val, const int n) { return static_cast<int32_t>(static_cast<uint32_t>(val) >> n); }
 template <          > inline int16_t rshift(const int16_t val, const int n) { return static_cast<int16_t>(static_cast<uint16_t>(val) >> n); }
 template <          > inline int8_t  rshift(const int8_t  val, const int n) { return static_cast<int8_t >(static_cast<uint8_t >(val) >> n); }
+
+template <typename T>
+inline T All_one_bits<T>::make()
+{
+	return ~((T)0);
+}
+
+template <>
+inline float All_one_bits<float>::make()
+{
+	uint32_t all_one_bits = ~0;
+	float all_one_bits_f = *((float*)((void*)(&all_one_bits)));
+	return all_one_bits_f;
+}
+
+template <>
+inline double All_one_bits<double>::make()
+{
+	uint64_t all_one_bits = ~0;
+	double all_one_bits_f = *((double*)((void*)(&all_one_bits)));
+	return all_one_bits_f;
+}
 }
-Original file line number
+Diff line change
@@ Expand Up / @@ -663,6 +663,7 @@ test-linux-armv8-unit-sve: @@
           artifacts: true
       tags:
         - arm64
+        - armie
         - linux
         - docker
       script:
@@ Expand Down @@