From bc12d5caaa0868cd12141a532a186432dbae243a Mon Sep 17 00:00:00 2001 From: Adrien Cassagne Date: Fri, 23 Jun 2017 19:20:15 +0200 Subject: [PATCH] Add a LICENSE and a README. --- LICENSE | 21 +++++ README.md | 223 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 244 insertions(+) create mode 100644 LICENSE create mode 100644 README.md diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..252c4d7 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2017 MIPP + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..22548e4 --- /dev/null +++ b/README.md @@ -0,0 +1,223 @@ +# MyIntrinsics++ (MIPP) + +## Purpose + +MIPP is a portable and Open-source wrapper (MIT license) for vector intrinsic functions (SIMD) written in C++11. It works for SSE, AVX and ARM NEON instructions. +MIPP wrapper supports simple/double precision floating-point numbers and also signed integer arithmetic (32-bit, 16-bit and 8-bit). +On ARM NEON instructions the wrapper supports only simple precision floating-point numbers. + +With the MIPP wrapper you do not need to write a specific intrinsic code anymore. Just use provided functions and the wrapper will automatically generates the right intrisic calls for your specific architecture. + +## Short documentation + +### Install and configure your code for MIPP + +You don't have to install MIPP because it is a simple C++ header file. +Just include the header into your source files when the wrapper is needed. + + #include "MIPP/mipp.h" + +mipp.h use a C++ `namespace`: `mipp`, if you do not want to prefix all the MIPP calls by `mipp::` you can do that: + + #include "MIPP/mipp.h" + using namespace mipp; + +Before trying to compile, think to tell the compiler what kind of vector instructions you want to use. Remember, MIPP currently supports SSE, AVX and NEON instructions. +For example, if you are using GNU compiler (g++) you simply have to add the `-march=native` option for SSE and AVX CPUs compatible. +For ARM CPUs with NEON instructions you have to add the `-mfpu=neon` option (since most of current NEON instructions are not IEEE-754 compatible). +MIPP also use some nice features provided by the C++11 and so we have to add the `-std=c++11` flag to compile the code. Your are now ready to run your code with the mipp.h wrapper. + +### Vector register declaration + +Just use the `mipp::Reg` type. + + mipp:Reg r1, r2, r3; // we have declared 3 vector registers + +But we do not know the number of elements per registers here. This number of elements can be obtained by calling the `mipp::nElReg()` function (`T` is a template parameter, it can be `double`, `float`, `int`, `short` or `signed char` type). + + for(int i = 0; i < n; i += mipp::nElReg()) { + // ... + } + +The register size directly depends on the precision of the data we are working on. + +### Register load and store instructions + +Firstly, register loads or stores need to be aligned on the register size. +To allocate aligned data you can use the mipp predifined vector class: `mipp::vector`. This class is fully retro-compatible with the standard `std::vector` class and it can be use everywhere you can use `std::vector`. + + mipp::vector myVector(n); + +Now, if the data are correctly allocated we can perform a register loading from the vector: + + int n = mipp::nElReg() * 10; + mipp::vector myVector(n); + int i = 0; + mipp::Reg r1 = &myVector[i*mipp::nElReg()]; + +Store can be done with the `store(...)` method: + + int n = mipp::nElReg() * 10; + mipp::vector myVector(n); + int i = 0; + mipp::Reg r1 = &myVector[i*mipp::nElReg()]; + + // do something with r1 + + r1.store(&myVector[(i+1)*mipp::nElReg()]); + +### Register initialization + +You can initialize a vector register from a scalar value: + + mipp::Reg r1; // r1 = | unknown | unknown | unknown | unknown | + r1 = 1.0; // r1 = | +1.0 | +1.0 | +1.0 | +1.0 | + +Or from an initializer list (`std::initializer_list`): + + mipp::Reg r1; // r1 = | unknown | unknown | unknown | unknown | + r1 = {1.0, 2.0, 3.0, 4.0}; // r1 = | +1.0 | +2.0 | +3.0 | +4.0 | + +### Computational instructions + +**Add** two vector registers: + + mipp::Reg r1, r2, r3; + + r1 = 1.0; // r1 = | +1.0 | +1.0 | +1.0 | +1.0 | + r2 = 2.0; // r2 = | +2.0 | +2.0 | +2.0 | +2.0 | + + r3 = r1 + r2; // r3 = | +3.0 | +3.0 | +3.0 | +3.0 | + +**Subtract** two vector registers: + + mipp::Reg r1, r2, r3; + + r1 = 1.0; // r1 = | +1.0 | +1.0 | +1.0 | +1.0 | + r2 = 2.0; // r2 = | +2.0 | +2.0 | +2.0 | +2.0 | + + r3 = r1 - r2; // r3 = | -1.0 | -1.0 | -1.0 | -1.0 | + +**Multiply** two vector registers: + + mipp::Reg r1, r2, r3; + + r1 = 1.0; // r1 = | +1.0 | +1.0 | +1.0 | +1.0 | + r2 = 2.0; // r2 = | +2.0 | +2.0 | +2.0 | +2.0 | + + r3 = r1 * r2; // r3 = | +2.0 | +2.0 | +2.0 | +2.0 | + +**Divide** two vector registers: + + mipp::Reg r1, r2, r3; + + r1 = 1.0; // r1 = | +1.0 | +1.0 | +1.0 | +1.0 | + r2 = 2.0; // r2 = | +2.0 | +2.0 | +2.0 | +2.0 | + + r3 = r1 / r2; // r3 = | +0.5 | +0.5 | +0.5 | +0.5 | + +**Fused multiply and add** of three vector registers: + + mipp::Reg r1, r2, r3, r4; + + r1 = 2.0; // r1 = | +2.0 | +2.0 | +2.0 | +2.0 | + r2 = 3.0; // r2 = | +3.0 | +3.0 | +3.0 | +3.0 | + r2 = 1.0; // r3 = | +1.0 | +1.0 | +1.0 | +1.0 | + + // r4 = (r1 * r2) + r3 + r4 = mipp::fmadd(r1, r2, r3); // r4 = | +7.0 | +7.0 | +7.0 | +7.0 | + +**Fused negative multiply and add** of three vector registers: + + mipp::Reg r1, r2, r3, r4; + + r1 = 2.0; // r1 = | +2.0 | +2.0 | +2.0 | +2.0 | + r2 = 3.0; // r2 = | +3.0 | +3.0 | +3.0 | +3.0 | + r2 = 1.0; // r3 = | +1.0 | +1.0 | +1.0 | +1.0 | + + // r4 = -((r1 * r2) + r3) + r4 = mipp::fnmadd(r1, r2, r3); // r4 = | -7.0 | -7.0 | -7.0 | -7.0 | + +**Square root** of a vector register: + + mipp::Reg r1, r2; + + r1 = 9.0; // r1 = | +9.0 | +9.0 | +9.0 | +9.0 | + + r2 = mipp::sqrt(r1); // r2 = | +3.0 | +3.0 | +3.0 | +3.0 | + +**Reciprocal square root** of a vector register (be careful: this instrinsic exists only for simple precision floating-point numbers): + + mipp::Reg r1, r2; + + r1 = 9.0; // r1 = | +9.0 | +9.0 | +9.0 | +9.0 | + + r2 = mipp::rsqrt(r1); // r2 = | +0.3 | +0.3 | +0.3 | +0.3 | + +### Comparisons + +Find the **minimum** between two vector registers: + + mipp::Reg r1, r2, r3; + + r1 = 2.0; // r1 = | +2.0 | +2.0 | +2.0 | +2.0 | + r2 = 3.0; // r2 = | +3.0 | +3.0 | +3.0 | +3.0 | + + r3 = mipp::min(r1, r2); // r3 = | +2.0 | +2.0 | +2.0 | +2.0 | + +Find the **maximum** between two vector registers: + + mipp::Reg r1, r2, r3; + + r1 = 2.0; // r1 = | +2.0 | +2.0 | +2.0 | +2.0 | + r2 = 3.0; // r2 = | +3.0 | +3.0 | +3.0 | +3.0 | + + r3 = mipp::max(r1, r2); // r3 = | +3.0 | +3.0 | +3.0 | +3.0 | + +### Permutations + +The `rot(...)` method allows you to perform a **rotation** (a cyclic permutation) of the elements inside the register: + + mipp::Reg r1, r2; + r1 = {3.0, 2.0, 1.0, 0.0} // r1 = | +3.0 | +2.0 | +1.0 | +0.0 | + + r2 = mipp::rot(r1); // r2 = | +0.0 | +3.0 | +2.0 | +1.0 | + r1 = mipp::rot(r2); // r1 = | +1.0 | +0.0 | +3.0 | +2.0 | + r2 = mipp::rot(r1); // r2 = | +2.0 | +1.0 | +0.0 | +3.0 | + r1 = mipp::rot(r2); // r1 = | +3.0 | +2.0 | +1.0 | +0.0 | + +Of course there are many more available instructions in the MIPP wrapper and you can find these instructions in the MIPP files . + +## Code examples + +### Addition of two vectors + + + #include // rand() + #include "MIPP/mipp.h" + + int main() + { + // data allocation + const int n = 32000; // size of the vectors vA, vB, vC + mipp::vector vA(n); // in + mipp::vector vB(n); // in + mipp::vector vC(n); // out + + // data initialization + for (int i = 0; i < n; i++) vA[i] = rand() % 10; + for (int i = 0; i < n; i++) vB[i] = rand() % 10; + + // declare 3 vector registers + mipp::Reg rA, rB, rC; + + // compute rC with the MIPP vectorized functions + for (int i = 0; i < n; i += mipp::nElReg()) { + rA = &vA[i]; + rB = &vB[i]; + rC = rA + rB; + rC.store(&vC[i]); + } + + return 0; + } \ No newline at end of file