diff --git a/.ci/env/apt.sh b/.ci/env/apt.sh index 4c370e8ee5e..2034d3d1cf5 100755 --- a/.ci/env/apt.sh +++ b/.ci/env/apt.sh @@ -37,7 +37,7 @@ function install_dpcpp { } function install_mkl { - sudo apt-get install intel-oneapi-mkl-devel + sudo apt-get install -y intel-oneapi-mkl-devel=2024.2.1-103 } function install_clang-format { diff --git a/.ci/pipeline/ci.yml b/.ci/pipeline/ci.yml index a0620f66e9d..76f4fa9bf7a 100755 --- a/.ci/pipeline/ci.yml +++ b/.ci/pipeline/ci.yml @@ -28,6 +28,9 @@ variables: TBB_VERSION : 'v2021.10.0' VM_IMAGE : 'ubuntu-22.04' SYSROOT_OS: 'jammy' + WIN_BASEKIT_VERSION: '2024.2.1.101' + WINDOWS_BASEKIT_URL: 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/d91caaa0-7306-46ea-a519-79a0423e1903/w_BaseKit_p_$(WIN_BASEKIT_VERSION)_offline.exe' + WINDOWS_DPCPP_COMPONENTS: 'intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.tbb.devel' jobs: - job: 'FormatterChecks' @@ -58,6 +61,9 @@ jobs: - script: | .ci/env/apt.sh dev-base displayName: 'apt-get and conda install' + - script: | + .ci/env/apt.sh mkl + displayName: 'mkl installation' - script: | .ci/scripts/describe_system.sh displayName: 'System info' @@ -415,6 +421,9 @@ jobs: - script: | .ci/env/apt.sh dpcpp displayName: 'dpcpp installation' + - script: | + .ci/env/apt.sh mkl + displayName: 'mkl installation' - script: | source /opt/intel/oneapi/compiler/latest/env/vars.sh .ci/scripts/describe_system.sh @@ -435,14 +444,17 @@ jobs: continueOnError: true - script: | source /opt/intel/oneapi/compiler/latest/env/vars.sh + source /opt/intel/oneapi/mkl/latest/env/vars.sh .ci/scripts/test.sh --test-kind examples --build-dir $(release.dir) --compiler clang --interface daal/cpp --build-system cmake displayName: 'daal/cpp examples' - script: | source /opt/intel/oneapi/compiler/latest/env/vars.sh + source /opt/intel/oneapi/mkl/latest/env/vars.sh .ci/scripts/test.sh --test-kind examples --build-dir $(release.dir) --compiler clang --interface oneapi/cpp --build-system cmake displayName: 'oneapi/cpp examples' - script: | source /opt/intel/oneapi/compiler/latest/env/vars.sh + source /opt/intel/oneapi/mkl/latest/env/vars.sh .ci/scripts/test.sh --test-kind samples --build-dir $(release.dir) --compiler gnu --interface daal/cpp/mpi --conda-env ci-env --build-system cmake displayName: 'daal/cpp/mpi samples' - task: PublishPipelineArtifact@1 @@ -520,12 +532,15 @@ jobs: --test_thread_mode=par displayName: 'cpp-examples-thread-release-static' - - script: | - export DALROOT=`pwd`/bazel-bin/release/daal/latest - bazel test //examples/oneapi/cpp:all \ - --test_link_mode=release_dynamic \ - --test_thread_mode=par - displayName: 'cpp-examples-thread-release-dynamic' + # The issue that bazel doesnt link MKL libs via -Wl, --start-group.. + # oneDAL make build pass this test + # TODO: add cycle linking in bazel + # - script: | + # export DALROOT=`pwd`/bazel-bin/release/daal/latest + # bazel test //examples/oneapi/cpp:all \ + # --test_link_mode=release_dynamic \ + # --test_thread_mode=par + # displayName: 'cpp-examples-thread-release-dynamic' - script: | bazel test //cpp/daal:tests @@ -645,57 +660,6 @@ jobs: displayName: 'Upload conformance tests artifacts' continueOnError: true -- job: 'macOSMakeClang' - timeoutInMinutes: 0 - variables: - release.dir: '__release_mac_clang' - platform.type : 'mac32e' - pool: - vmImage: 'macos-12' - steps: - - script: | - brew install dos2unix tree - conda create -n ci-env -q -y -c conda-forge python=3.10 - source /usr/local/miniconda/etc/profile.d/conda.sh - conda activate ci-env - pip install -q cpufeature - displayName: 'brew and conda install' - - script: | - source /usr/local/miniconda/etc/profile.d/conda.sh - conda activate ci-env - .ci/scripts/describe_system.sh - displayName: 'System info' - - script: | - .ci/scripts/build.sh --compiler clang --target daal --optimizations "sse2 avx2" --conda-env ci-env - displayName: 'make daal' - - script: | - .ci/scripts/build.sh --compiler clang --target onedal_c --optimizations "sse2 avx2" - displayName: 'make onedal_c' - - task: PublishPipelineArtifact@1 - inputs: - artifactName: '$(platform.type) build' - targetPath: '$(Build.Repository.LocalPath)/$(release.dir)' - displayName: 'Upload build artifacts' - continueOnError: true - - script: | - .ci/scripts/test.sh --test-kind examples --build-dir $(release.dir) --compiler clang --interface daal/cpp --build-system cmake - displayName: 'daal/cpp examples' - - script: | - .ci/scripts/test.sh --test-kind examples --build-dir $(release.dir) --compiler clang --interface oneapi/cpp --build-system cmake - displayName: 'oneapi/cpp examples' - - script: | - deploy/nuget/prepare_dal_nuget.sh --release-dir $(release.dir) --build-nupkg yes - tree -h -I include __nuget/inteldal*/ - ls -lh __nuget/inteldal*.nupkg - displayName: 'nuget pkg' - - task: PublishPipelineArtifact@1 - inputs: - artifactName: '$(platform.type) fail' - targetPath: '$(Build.Repository.LocalPath)/$(release.dir)' - displayName: 'Uploading on fail' - condition: failed() - continueOnError: true - - job: 'WindowsMakeVC' timeoutInMinutes: 0 variables: @@ -704,6 +668,8 @@ jobs: pool: vmImage: 'windows-2022' steps: + - script: .ci/scripts/install_windows.bat $(WINDOWS_BASEKIT_URL) $(WINDOWS_DPCPP_COMPONENTS) + displayName: Install oneAPI Base Toolkit - script: | set PATH=C:\msys64\usr\bin;%PATH% pip install cpufeature @@ -714,9 +680,13 @@ jobs: bash .ci/scripts/describe_system.sh displayName: 'System info' - script: | + call C:\temp\oneapi\setvars.bat --force + set MKL_FPK_GPU_VERSION_LINE=2024.0.0 .\.ci\scripts\build.bat daal vc avx2 displayName: 'make daal' - script: | + call C:\temp\oneapi\setvars.bat --force + set MKL_FPK_GPU_VERSION_LINE=2024.0.0 .\.ci\scripts\build.bat onedal_c vc avx2 displayName: 'make onedal_c' - task: PublishPipelineArtifact@1 @@ -726,10 +696,12 @@ jobs: displayName: 'Upload build artifacts' continueOnError: true - script: | + call C:\temp\oneapi\setvars.bat --force .\.ci\scripts\test.bat daal\cpp lib msvs cmake .\.ci\scripts\test.bat daal\cpp dll msvs cmake displayName: 'daal/cpp examples' - script: | + call C:\temp\oneapi\setvars.bat --force .\.ci\scripts\test.bat oneapi\cpp lib msvs cmake .\.ci\scripts\test.bat oneapi\cpp dll msvs cmake displayName: 'oneapi/cpp examples' diff --git a/.ci/scripts/build.bat b/.ci/scripts/build.bat index 76c78c620cd..2f460df60c4 100644 --- a/.ci/scripts/build.bat +++ b/.ci/scripts/build.bat @@ -28,14 +28,18 @@ set PATH=C:\msys64\usr\bin;%PATH% echo pacman -S --noconfirm msys/make msys/dos2unix pacman -S --noconfirm msys/make msys/dos2unix -echo call .ci\env\tbb.bat -if "%TBBROOT%"=="" if not exist .\__deps\tbb\win\tbb call .ci\env\tbb.bat || set errorcode=1 - -echo call .\dev\download_micromkl.bat -if "%MKLGPUFPKROOT%"=="" if not exist .\__deps\mklgpufpk\win call .\dev\download_micromkl.bat || set errorcode=1 - -echo call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall" x64 -if "%VISUALSTUDIOVERSION%"=="" call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall" x64 || set errorcode=1 +IF "%VS_VER%"=="2017_build_tools" ( + @call "C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\VC\Auxiliary\Build\vcvars64.bat" + echo "C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\VC\Auxiliary\Build\vcvars64.bat" +) ELSE ( + IF "%VS_VER%"=="2019_build_tools" ( + @call "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Auxiliary\Build\vcvars64.bat" + echo "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Auxiliary\Build\vcvars64.bat" + ) ELSE ( + @call "C:\temp\oneapi\setvars-vcvarsall.bat" %VS_VER% + echo "C:\temp\oneapi\setvars-vcvarsall.bat" %VS_VER% + ) +) echo make %1 -j%NUMBER_OF_PROCESSORS% COMPILER=%2 PLAT=win32e REQCPU=%3 make %1 -j%NUMBER_OF_PROCESSORS% COMPILER=%2 PLAT=win32e REQCPU=%3 || set errorcode=1 diff --git a/.ci/scripts/build.sh b/.ci/scripts/build.sh index 9a3548c1c6f..47bd0b81e57 100755 --- a/.ci/scripts/build.sh +++ b/.ci/scripts/build.sh @@ -160,8 +160,7 @@ fi #main actions echo "Call env scripts" if [ "${backend_config}" == "mkl" ]; then - echo "Sourcing MKL env" - "${ONEDAL_DIR}"/dev/download_micromkl.sh with_gpu="${with_gpu}" + source /opt/intel/oneapi/mkl/latest/env/vars.sh elif [ "${backend_config}" == "ref" ] && [ ! -z "${BLAS_INSTALL_DIR}" ]; then export OPENBLASROOT="${BLAS_INSTALL_DIR}" elif [ "${backend_config}" == "ref" ]; then diff --git a/.ci/scripts/install_windows.bat b/.ci/scripts/install_windows.bat new file mode 100644 index 00000000000..0da86a94960 --- /dev/null +++ b/.ci/scripts/install_windows.bat @@ -0,0 +1,31 @@ +@echo off +rem ============================================================================ +rem Copyright contributors to the oneDAL project +rem +rem Licensed under the Apache License, Version 2.0 (the "License"); +rem you may not use this file except in compliance with the License. +rem You may obtain a copy of the License at +rem +rem http://www.apache.org/licenses/LICENSE-2.0 +rem +rem Unless required by applicable law or agreed to in writing, software +rem distributed under the License is distributed on an "AS IS" BASIS, +rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +rem See the License for the specific language governing permissions and +rem limitations under the License. +rem ============================================================================ + +set URL=%1 +set COMPONENTS=%2 + +curl.exe --output %TEMP%\webimage.exe --url %URL% --retry 5 --retry-delay 5 +start /b /wait %TEMP%\webimage.exe -s -x -f webimage_extracted --log extract.log +del %TEMP%\webimage.exe +if "%COMPONENTS%"=="" ( + webimage_extracted\bootstrapper.exe -s --action install --eula=accept --install-dir=C:\temp\oneapi\ -p=NEED_VS2017_INTEGRATION=0 -p=NEED_VS2019_INTEGRATION=0 -p=NEED_VS2022_INTEGRATION=0 --log-dir=. +) else ( + webimage_extracted\bootstrapper.exe -s --action install --components=%COMPONENTS% --eula=accept --install-dir=C:\temp\oneapi\ -p=NEED_VS2017_INTEGRATION=0 -p=NEED_VS2019_INTEGRATION=0 -p=NEED_VS2022_INTEGRATION=0 --log-dir=. +) +set installer_exit_code=%ERRORLEVEL% +rd /s/q "webimage_extracted" +exit /b %installer_exit_code% diff --git a/.ci/scripts/test.bat b/.ci/scripts/test.bat index aaced0689be..a2ef555a66f 100644 --- a/.ci/scripts/test.bat +++ b/.ci/scripts/test.bat @@ -34,8 +34,20 @@ echo CPUCOUNT=%CPUCOUNT% echo PATH=C:\msys64\usr\bin;%PATH% set PATH=C:\msys64\usr\bin;%PATH% -echo call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall" x64 -call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall" x64 || set errorcode=1 +echo "%VISUALSTUDIOVERSION% HERE" + +IF "%VS_VER%"=="2017_build_tools" ( + @call "C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\VC\Auxiliary\Build\vcvars64.bat" + echo "C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\VC\Auxiliary\Build\vcvars64.bat" +) ELSE ( + IF "%VS_VER%"=="2019_build_tools" ( + @call "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Auxiliary\Build\vcvars64.bat" + echo "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Auxiliary\Build\vcvars64.bat" + ) ELSE ( + @call "C:\temp\oneapi\setvars-vcvarsall.bat" %VS_VER% + echo "C:\temp\oneapi\setvars-vcvarsall.bat" %VS_VER% + ) +) echo call __release_win_vc\daal\latest\env\vars.bat call __release_win_vc\daal\latest\env\vars.bat || set errorcode=1 diff --git a/.github/workflows/nightly-build.yml b/.github/workflows/nightly-build.yml index dc9e6a06057..6f802d3b405 100644 --- a/.github/workflows/nightly-build.yml +++ b/.github/workflows/nightly-build.yml @@ -24,6 +24,10 @@ on: permissions: contents: read +env: + WINDOWS_BASEKIT_URL: 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/d91caaa0-7306-46ea-a519-79a0423e1903/w_BaseKit_p_2024.2.1.101_offline.exe' + WINDOWS_ALL_COMPONENTS: 'intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.tbb.devel' + jobs: build_lnx: name: oneDAL Linux nightly build @@ -38,6 +42,8 @@ jobs: repository: oneapi-src/oneDAL - name: Install DPC++ run: .ci/env/apt.sh dpcpp + - name: Install MKL + run: .ci/env/apt.sh mkl - name: System Info run: | source /opt/intel/oneapi/compiler/latest/env/vars.sh @@ -68,32 +74,34 @@ jobs: with: repository: oneapi-src/oneDAL - name: Install DPC++ + shell: cmd run: | - & .ci/scripts/install_dpc.ps1 - - name: Prepare Intel OpenCL CPU runtime - run: | - # Store the unpacked runtime to centralize and reduce external downloads - & .ci/scripts/collect_opencl_rt.ps1 + call .\.ci\scripts\install_windows.bat ${{ env.WINDOWS_BASEKIT_URL }} ${{ env.WINDOWS_ALL_COMPONENTS }} - name: System Info shell: cmd run: | set PATH=C:\msys64\usr\bin;%PATH% pip install cpufeature - call .\dpcpp\compiler\latest\env\vars.bat + call C:\temp\oneapi\setvars.bat bash .ci/scripts/describe_system.sh - name: Make daal shell: cmd run: | + call C:\temp\oneapi\setvars.bat + set MKL_FPK_GPU_VERSION_LINE=2024.0.0 call .\.ci\scripts\build.bat daal vc avx2 - name: Make onedal shell: cmd run: | + call C:\temp\oneapi\setvars.bat + set MKL_FPK_GPU_VERSION_LINE=2024.0.0 call .\.ci\scripts\build.bat onedal_c vc avx2 - name: Make oneapi_dpc shell: cmd run: | - call .\dpcpp\compiler\latest\env\vars.bat - call .\dpcpp\compiler\latest\bin\sycl-ls.exe + call C:\temp\oneapi\setvars.bat + set MKL_FPK_GPU_VERSION_LINE=2024.0.0 + call C:\temp\oneapi\compiler\latest\bin\sycl-ls.exe call .\.ci\scripts\build.bat onedal_dpc vc avx2 - name: Archive build uses: actions/upload-artifact@v4 @@ -103,14 +111,9 @@ jobs: - name: Compress DPC++ shell: cmd run: | - tar -cvzf icx.zip .\dpcpp + tar -cvzf icx.zip C:\temp\oneapi - name: Archive DPC++ uses: actions/upload-artifact@v4 with: name: icx_compiler path: .\icx.zip - - name: Archive Intel OpenCL CPU runtime - uses: actions/upload-artifact@v4 - with: - name: opencl_rt_installer - path: .\opencl_rt.msi diff --git a/WORKSPACE b/WORKSPACE index 3cba5bcd224..38d50397eca 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -22,21 +22,6 @@ opencl_repo( name = "opencl", ) -load("@onedal//dev/bazel/deps:micromkl.bzl", "micromkl_repo", "micromkl_dpc_repo") -micromkl_repo( - name = "micromkl", - root_env_var = "MKLFPKROOT", - url = "https://github.com/oneapi-src/oneDAL/releases/download/Dependencies/mklfpk_lnx_20230413.tgz", - sha256 = "e99dd6fb18f1fda382c53373262d1bb44c1b58aa6edff94cfb0e9d8dcd3395ed", -) - -micromkl_dpc_repo( - name = "micromkl_dpc", - root_env_var = "MKLGPUFPKROOT", - url = "https://github.com/oneapi-src/oneDAL/releases/download/Dependencies/mklgpufpk_lnx_20240605.tgz", - sha256 = "0787a92e9580ed6b9fb97d054a0ed77994dbc18b4b3fb099451cb1e6ebdf4f16", -) - load("@onedal//dev/bazel/deps:openblas.bzl", "openblas_repo") openblas_repo( name = "openblas", @@ -87,16 +72,23 @@ mkl_repo( name = "mkl", root_env_var = "MKLROOT", urls = [ + # TODO: when the issue with binutils will be solved, replace 2023.0 to 2024.2 "https://files.pythonhosted.org/packages/76/8c/2e6fb6186fa9335a0feb7845e001e18c22627a06ae68650e5a84ca2b536d/mkl_static-2023.0.0-py2.py3-none-manylinux1_x86_64.whl", - "https://files.pythonhosted.org/packages/cf/d1/ea2d769006337d968a89337dd1c3eb09c528f9ac629e8ab99324e1122f03/mkl_include-2023.0.0-py2.py3-none-manylinux1_x86_64.whl", + #"https://files.pythonhosted.org/packages/c1/44/42ea3ad7bbaa65acb54c977961118d7b24ea687e7c3d64aba0a019cbfa19/mkl_static-2024.2.0-py2.py3-none-manylinux1_x86_64.whl", + "https://files.pythonhosted.org/packages/80/e4/93ddfd475420f1c24d96f3bba1f87ec31a1eea847884c4ccb243cb336a61/mkl_include-2024.2.0-py2.py3-none-manylinux1_x86_64.whl", + "https://files.pythonhosted.org/packages/c9/3a/8797ef320a04e0b939a07365f09ce11f5484150bd3600c6400391c5c36e9/mkl_devel_dpcpp-2024.2.0-py2.py3-none-manylinux1_x86_64.whl", ], sha256s = [ "49d16f315f6803b1046a4796686af766ad487f9f6d98ea76b6cdb2ebd5b559f9", - "14b0958dff799378975d83fbd00ce756645aa36b9f924bdfdb0fb031f72b734d", + #"8c2a6c6a144c5619f1df75fd550b32730f3e0632b55a15a42a95516e142ccf47", + "63ed16ece64d9420e9fe1d5e1b55e0680632b61ad1c0e5f207b17f85233fcc09", + "b80099209aef1b147b8f1c1621a47078fba2c17b2faee131939ea4d32da2c35c", ], strip_prefixes = [ "mkl_static-2023.0.0.data/data", - "mkl_include-2023.0.0.data/data", + #"mkl_static-2024.2.0.data/data", + "mkl_include-2024.2.0.data/data", + "mkl_devel_dpcpp-2024.2.0.data/data", ], ) diff --git a/cpp/daal/BUILD b/cpp/daal/BUILD index 0cbf8a50316..4557c88ffac 100644 --- a/cpp/daal/BUILD +++ b/cpp/daal/BUILD @@ -18,10 +18,7 @@ daal_module( deps = select({ "@config//:backend_ref": [ ], "//conditions:default": [ - "@micromkl//:vml_ipp", - # TODO: Currently vml_ipp lib depends on TBB, but it shouldn't - # Remove TBB from deps once problem with vml_ipp is resolved - "@tbb//:tbb_binary", + "@mkl//:mkl_thr", ], }), ) @@ -32,7 +29,7 @@ daal_module( deps = select({ "@config//:backend_ref": [ "@openblas//:openblas", ], - "//conditions:default": [ "@micromkl//:mkl_thr", + "//conditions:default": [ "@mkl//:mkl_thr", ], }), ) @@ -64,8 +61,7 @@ daal_module( ], "//conditions:default": [ ":public_includes", - "@micromkl//:headers", - "@micromkl_dpc//:headers", + "@mkl//:headers", ], }), ) diff --git a/cpp/daal/src/algorithms/export_win32e.def b/cpp/daal/src/algorithms/export_win32e.def index 443714aef69..7962b0a8844 100644 --- a/cpp/daal/src/algorithms/export_win32e.def +++ b/cpp/daal/src/algorithms/export_win32e.def @@ -15,19 +15,3 @@ ;=============================================================================== EXPORTS -fpk_serv_malloc -fpk_serv_free -fpk_serv_memcpy_s -fpk_serv_lock -fpk_serv_unlock -fpk_serv_strnlen_s -fpk_serv_strncpy_s -fpk_serv_strncat_s -fpk_serv_thread_yield -fpk_serv_core_register_cleanup -fpk_serv_calloc -fpk_serv_printf_s -fpk_serv_memmove_s -fpk_serv_realloc -fpk_serv_print -fpk_serv_exit diff --git a/cpp/daal/src/algorithms/kmeans/kmeans_init_impl.i b/cpp/daal/src/algorithms/kmeans/kmeans_init_impl.i old mode 100755 new mode 100644 diff --git a/cpp/daal/src/algorithms/multiclassclassifier/multiclassclassifier_predict_kernel.h b/cpp/daal/src/algorithms/multiclassclassifier/multiclassclassifier_predict_kernel.h index 2b9225646ee..065f3a3a1c0 100644 --- a/cpp/daal/src/algorithms/multiclassclassifier/multiclassclassifier_predict_kernel.h +++ b/cpp/daal/src/algorithms/multiclassclassifier/multiclassclassifier_predict_kernel.h @@ -22,8 +22,8 @@ //-- */ -#ifndef __MULTICLASSCLASSIFIER_PREDICT_FPK_H__ -#define __MULTICLASSCLASSIFIER_PREDICT_FPK_H__ +#ifndef __MULTICLASSCLASSIFIER_PREDICT_KERNEL_H__ +#define __MULTICLASSCLASSIFIER_PREDICT_KERNEL_H__ #include "data_management/data/numeric_table.h" #include "algorithms/model.h" diff --git a/cpp/daal/src/algorithms/naivebayes/naivebayes_predict_kernel.h b/cpp/daal/src/algorithms/naivebayes/naivebayes_predict_kernel.h index 3924f9c67d1..9e88cd86f17 100644 --- a/cpp/daal/src/algorithms/naivebayes/naivebayes_predict_kernel.h +++ b/cpp/daal/src/algorithms/naivebayes/naivebayes_predict_kernel.h @@ -21,8 +21,8 @@ //-- */ -#ifndef _NAIVEBAYES_ASSIGN_FPK_H -#define _NAIVEBAYES_ASSIGN_FPK_H +#ifndef __NAIVEBAYES_PREDICT_KERNEL_H__ +#define __NAIVEBAYES_PREDICT_KERNEL_H__ #include "algorithms/naive_bayes/multinomial_naive_bayes_model.h" #include "algorithms/naive_bayes/multinomial_naive_bayes_predict_types.h" diff --git a/cpp/daal/src/algorithms/naivebayes/naivebayes_train_kernel.h b/cpp/daal/src/algorithms/naivebayes/naivebayes_train_kernel.h index 01e1ce238ba..6a4a40d3047 100644 --- a/cpp/daal/src/algorithms/naivebayes/naivebayes_train_kernel.h +++ b/cpp/daal/src/algorithms/naivebayes/naivebayes_train_kernel.h @@ -21,8 +21,8 @@ //-- */ -#ifndef __NAIVEBAYES_TRAIN_FPK_H__ -#define __NAIVEBAYES_TRAIN_FPK_H__ +#ifndef __NAIVEBAYES_TRAIN_KERNEL_H__ +#define __NAIVEBAYES_TRAIN_KERNEL_H__ #include "algorithms/naive_bayes/multinomial_naive_bayes_model.h" #include "algorithms/naive_bayes/multinomial_naive_bayes_training_types.h" diff --git a/cpp/daal/src/algorithms/qr/qr_dense_default_kernel.h b/cpp/daal/src/algorithms/qr/qr_dense_default_kernel.h index e667b3e81ab..b831dc8b1d9 100644 --- a/cpp/daal/src/algorithms/qr/qr_dense_default_kernel.h +++ b/cpp/daal/src/algorithms/qr/qr_dense_default_kernel.h @@ -21,8 +21,8 @@ //-- */ -#ifndef __QR_FPK_H__ -#define __QR_FPK_H__ +#ifndef __QR_DENSE_DEFAULT_KERNEL_H__ +#define __QR_DENSE_DEFAULT_KERNEL_H__ #include "algorithms/qr/qr_batch.h" #include "src/algorithms/kernel.h" diff --git a/cpp/daal/src/algorithms/svd/svd_dense_default_kernel.h b/cpp/daal/src/algorithms/svd/svd_dense_default_kernel.h index f0a19e8eb03..c8f8f333467 100644 --- a/cpp/daal/src/algorithms/svd/svd_dense_default_kernel.h +++ b/cpp/daal/src/algorithms/svd/svd_dense_default_kernel.h @@ -21,8 +21,8 @@ //-- */ -#ifndef __SVD_FPK_H__ -#define __SVD_FPK_H__ +#ifndef __SVD_DENSE_DEFAULT_KERNEL_H__ +#define __SVD_DENSE_DEFAULT_KERNEL_H__ #include "algorithms/svd/svd_batch.h" #include "src/algorithms/kernel.h" diff --git a/cpp/daal/src/externals/config_mkl.h b/cpp/daal/src/externals/config_mkl.h index 8952ca2c40b..3c6465886e2 100644 --- a/cpp/daal/src/externals/config_mkl.h +++ b/cpp/daal/src/externals/config_mkl.h @@ -27,6 +27,7 @@ #include "services/daal_defines.h" #include "services/env_detect.h" +#include "src/externals/service_thread_declar_mkl.h" #include "src/externals/service_blas_mkl.h" #include "src/externals/service_lapack_mkl.h" #include "src/externals/service_math_mkl.h" diff --git a/cpp/daal/src/externals/core_threading_win_dll.cpp b/cpp/daal/src/externals/core_threading_win_dll.cpp index 7a25a0eddd1..f24689a341e 100644 --- a/cpp/daal/src/externals/core_threading_win_dll.cpp +++ b/cpp/daal/src/externals/core_threading_win_dll.cpp @@ -788,396 +788,3 @@ DAAL_EXPORT void * _getThreadPinner(bool create_pinner, void (*read_topo)(int &, return _getThreadPinner_ptr(create_pinner, read_topo, deleter); } #endif - -#define CALL_VOID_FUNC_FROM_DLL(fn_dpref, fn_name, argdecl, argcall) \ - typedef void(*fn_dpref##fn_name##_t) argdecl; \ - static fn_dpref##fn_name##_t fn_dpref##fn_name##_ptr = NULL; \ - CALL_VOID_FUNC_FROM_DLL_CPU(fn_dpref, avx512_, fn_name, argdecl, argcall) \ - CALL_VOID_FUNC_FROM_DLL_CPU(fn_dpref, avx2_, fn_name, argdecl, argcall) \ - CALL_VOID_FUNC_FROM_DLL_CPU(fn_dpref, sse42_, fn_name, argdecl, argcall) \ - CALL_VOID_FUNC_FROM_DLL_CPU(fn_dpref, sse2_, fn_name, argdecl, argcall) - -#define CALL_VOID_FUNC_FROM_DLL_CPU(fn_dpref, fn_cpu, fn_name, argdecl, argcall) \ - extern "C" DAAL_EXPORT void fn_dpref##fn_cpu##fn_name argdecl \ - { \ - load_daal_thr_dll(); \ - if (fn_dpref##fn_name##_ptr == NULL) \ - { \ - fn_dpref##fn_name##_ptr = (fn_dpref##fn_name##_t)load_daal_thr_func(#fn_dpref #fn_cpu #fn_name); \ - } \ - fn_dpref##fn_name##_ptr argcall; \ - } - -#if defined(_WIN64) - #define CALL_VOID_FUNC_FROM_DLL_CPU_MIC(fn_dpref, fn_cpu, fn_name, argdecl, argcall) \ - extern "C" DAAL_EXPORT void fn_dpref##fn_cpu##fn_name argdecl \ - { \ - load_daal_thr_dll(); \ - if (fn_dpref##fn_name##_ptr == NULL) \ - { \ - fn_dpref##fn_name##_ptr = (fn_dpref##fn_name##_t)load_daal_thr_func(#fn_dpref #fn_cpu #fn_name); \ - } \ - fn_dpref##fn_name##_ptr argcall; \ - } -#else - #define CALL_VOID_FUNC_FROM_DLL_CPU_MIC(fn_dpref, fn_cpu, fn_name, argdecl, argcall) -#endif - -#define CALL_RET_FUNC_FROM_DLL(ret_type, fn_dpref, fn_name, argdecl, argcall) \ - typedef ret_type(*fn_dpref##fn_name##_t) argdecl; \ - static fn_dpref##fn_name##_t fn_dpref##fn_name##_ptr = NULL; \ - CALL_RET_FUNC_FROM_DLL_CPU(ret_type, fn_dpref, avx512_, fn_name, argdecl, argcall) \ - CALL_RET_FUNC_FROM_DLL_CPU(ret_type, fn_dpref, avx2_, fn_name, argdecl, argcall) \ - CALL_RET_FUNC_FROM_DLL_CPU(ret_type, fn_dpref, sse42_, fn_name, argdecl, argcall) \ - CALL_RET_FUNC_FROM_DLL_CPU(ret_type, fn_dpref, sse2_, fn_name, argdecl, argcall) - -#define CALL_RET_FUNC_FROM_DLL_CPU(ret_type, fn_dpref, fn_cpu, fn_name, argdecl, argcall) \ - extern "C" DAAL_EXPORT ret_type fn_dpref##fn_cpu##fn_name argdecl \ - { \ - load_daal_thr_dll(); \ - if (fn_dpref##fn_name##_ptr == NULL) \ - { \ - fn_dpref##fn_name##_ptr = (fn_dpref##fn_name##_t)load_daal_thr_func(#fn_dpref #fn_cpu #fn_name); \ - } \ - return fn_dpref##fn_name##_ptr argcall; \ - } - -#if defined(_WIN64) - #define CALL_RET_FUNC_FROM_DLL_CPU_MIC(ret_type, fn_dpref, fn_cpu, fn_name, argdecl, argcall) \ - extern "C" DAAL_EXPORT ret_type fn_dpref##fn_cpu##fn_name argdecl \ - { \ - load_daal_thr_dll(); \ - if (fn_dpref##fn_name##_ptr == NULL) \ - { \ - fn_dpref##fn_name##_ptr = (fn_dpref##fn_name##_t)load_daal_thr_func(#fn_dpref #fn_cpu #fn_name); \ - } \ - return fn_dpref##fn_name##_ptr argcall; \ - } -#else - #define CALL_RET_FUNC_FROM_DLL_CPU_MIC(ret_type, fn_dpref, fn_cpu, fn_name, argdecl, argcall) -#endif - -/* Used directly in Intel(R) oneAPI Data Analytics Library (oneDAL) */ -CALL_VOID_FUNC_FROM_DLL(fpk_blas_, dsyrk, - (const char * uplo, const char * trans, const DAAL_INT * n, const DAAL_INT * k, const double * alpha, const double * a, - const DAAL_INT * lda, const double * beta, double * c, const DAAL_INT * ldc), - (uplo, trans, n, k, alpha, a, lda, beta, c, ldc)); - -CALL_VOID_FUNC_FROM_DLL(fpk_blas_, ssyrk, - (const char * uplo, const char * trans, const DAAL_INT * n, const DAAL_INT * k, const float * alpha, const float * a, - const DAAL_INT * lda, const float * beta, float * c, const DAAL_INT * ldc), - (uplo, trans, n, k, alpha, a, lda, beta, c, ldc)); - -CALL_VOID_FUNC_FROM_DLL(fpk_blas_, dsyr, - (const char * uplo, const DAAL_INT * n, const double * alpha, const double * x, const DAAL_INT * incx, double * a, - const DAAL_INT * lda), - (uplo, n, alpha, x, incx, a, lda)); -CALL_VOID_FUNC_FROM_DLL(fpk_blas_, ssyr, - (const char * uplo, const DAAL_INT * n, const float * alpha, const float * x, const DAAL_INT * incx, float * a, - const DAAL_INT * lda), - (uplo, n, alpha, x, incx, a, lda)); - -CALL_VOID_FUNC_FROM_DLL(fpk_blas_, dgemm, - (const char * transa, const char * transb, const DAAL_INT * m, const DAAL_INT * n, const DAAL_INT * k, const double * alpha, - const double * a, const DAAL_INT * lda, const double * b, const DAAL_INT * ldb, const double * beta, double * c, - const DAAL_INT * ldc), - (transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)); -CALL_VOID_FUNC_FROM_DLL(fpk_blas_, sgemm, - (const char * transa, const char * transb, const DAAL_INT * m, const DAAL_INT * n, const DAAL_INT * k, const float * alpha, - const float * a, const DAAL_INT * lda, const float * b, const DAAL_INT * ldb, const float * beta, float * c, - const DAAL_INT * ldc), - (transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)); - -CALL_VOID_FUNC_FROM_DLL(fpk_blas_, xdgemm, - (const char * transa, const char * transb, const DAAL_INT * m, const DAAL_INT * n, const DAAL_INT * k, const double * alpha, - const double * a, const DAAL_INT * lda, const double * b, const DAAL_INT * ldb, const double * beta, double * c, - const DAAL_INT * ldc), - (transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)); -CALL_VOID_FUNC_FROM_DLL(fpk_blas_, xsgemm, - (const char * transa, const char * transb, const DAAL_INT * m, const DAAL_INT * n, const DAAL_INT * k, const float * alpha, - const float * a, const DAAL_INT * lda, const float * b, const DAAL_INT * ldb, const float * beta, float * c, - const DAAL_INT * ldc), - (transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)); - -CALL_VOID_FUNC_FROM_DLL(fpk_blas_, dsymm, - (const char * side, const char * uplo, const DAAL_INT * m, const DAAL_INT * n, const double * alpha, const double * a, - const DAAL_INT * lda, const double * b, const DAAL_INT * ldb, const double * beta, double * c, const DAAL_INT * ldc), - (side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc)); -CALL_VOID_FUNC_FROM_DLL(fpk_blas_, ssymm, - (const char * side, const char * uplo, const DAAL_INT * m, const DAAL_INT * n, const float * alpha, const float * a, - const DAAL_INT * lda, const float * b, const DAAL_INT * ldb, const float * beta, float * c, const DAAL_INT * ldc), - (side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc)); - -CALL_VOID_FUNC_FROM_DLL(fpk_blas_, dgemv, - (const char * trans, const DAAL_INT * m, const DAAL_INT * n, const double * alpha, const double * a, const DAAL_INT * lda, - const double * x, const DAAL_INT * incx, const double * beta, double * y, const DAAL_INT * incy), - (trans, m, n, alpha, a, lda, x, incx, beta, y, incy)); -CALL_VOID_FUNC_FROM_DLL(fpk_blas_, sgemv, - (const char * trans, const DAAL_INT * m, const DAAL_INT * n, const float * alpha, const float * a, const DAAL_INT * lda, - const float * x, const DAAL_INT * incx, const float * beta, float * y, const DAAL_INT * incy), - (trans, m, n, alpha, a, lda, x, incx, beta, y, incy)); - -CALL_VOID_FUNC_FROM_DLL(fpk_blas_, daxpy, - (const DAAL_INT * n, const double * alpha, const double * x, const DAAL_INT * incx, double * y, const DAAL_INT * incy), - (n, alpha, x, incx, y, incy)); -CALL_VOID_FUNC_FROM_DLL(fpk_blas_, saxpy, - (const DAAL_INT * n, const float * alpha, const float * x, const DAAL_INT * incx, float * y, const DAAL_INT * incy), - (n, alpha, x, incx, y, incy)); - -CALL_VOID_FUNC_FROM_DLL(fpk_blas_, xdsyr, - (const char * uplo, const DAAL_INT * n, const double * alpha, const double * x, const DAAL_INT * incx, double * a, - const DAAL_INT * lda), - (uplo, n, alpha, x, incx, a, lda)); -CALL_VOID_FUNC_FROM_DLL(fpk_blas_, xssyr, - (const char * uplo, const DAAL_INT * n, const float * alpha, const float * x, const DAAL_INT * incx, float * a, - const DAAL_INT * lda), - (uplo, n, alpha, x, incx, a, lda)); - -CALL_VOID_FUNC_FROM_DLL(fpk_blas_, xdsyrk, - (const char * uplo, const char * trans, const DAAL_INT * n, const DAAL_INT * k, const double * alpha, const double * a, - const DAAL_INT * lda, const double * beta, double * c, const DAAL_INT * ldc), - (uplo, trans, n, k, alpha, a, lda, beta, c, ldc)); -CALL_VOID_FUNC_FROM_DLL(fpk_blas_, xssyrk, - (const char * uplo, const char * trans, const DAAL_INT * n, const DAAL_INT * k, const float * alpha, const float * a, - const DAAL_INT * lda, const float * beta, float * c, const DAAL_INT * ldc), - (uplo, trans, n, k, alpha, a, lda, beta, c, ldc)); - -CALL_VOID_FUNC_FROM_DLL(fpk_blas_, xdsymm, - (const char * side, const char * uplo, const DAAL_INT * m, const DAAL_INT * n, const double * alpha, const double * a, - const DAAL_INT * lda, const double * b, const DAAL_INT * ldb, const double * beta, double * c, const DAAL_INT * ldc), - (side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc)); -CALL_VOID_FUNC_FROM_DLL(fpk_blas_, xssymm, - (const char * side, const char * uplo, const DAAL_INT * m, const DAAL_INT * n, const float * alpha, const float * a, - const DAAL_INT * lda, const float * b, const DAAL_INT * ldb, const float * beta, float * c, const DAAL_INT * ldc), - (side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc)); - -CALL_VOID_FUNC_FROM_DLL(fpk_spblas_, mkl_dcsrmultd, - (const char * transa, const DAAL_INT * m, const DAAL_INT * n, const DAAL_INT * k, double * a, DAAL_INT * ja, DAAL_INT * ia, - double * b, DAAL_INT * jb, DAAL_INT * ib, double * c, DAAL_INT * ldc), - (transa, m, n, k, a, ja, ia, b, jb, ib, c, ldc)); -CALL_VOID_FUNC_FROM_DLL(fpk_spblas_, mkl_scsrmultd, - (const char * transa, const DAAL_INT * m, const DAAL_INT * n, const DAAL_INT * k, float * a, DAAL_INT * ja, DAAL_INT * ia, - float * b, DAAL_INT * jb, DAAL_INT * ib, float * c, DAAL_INT * ldc), - (transa, m, n, k, a, ja, ia, b, jb, ib, c, ldc)); - -CALL_VOID_FUNC_FROM_DLL(fpk_spblas_, mkl_dcsrmv, - (const char * transa, const DAAL_INT * m, const DAAL_INT * k, const double * alpha, const char * matdescra, - const double * val, const DAAL_INT * indx, const DAAL_INT * pntrb, const DAAL_INT * pntre, const double * x, - const double * beta, double * y), - (transa, m, k, alpha, matdescra, val, indx, pntrb, pntre, x, beta, y)); -CALL_VOID_FUNC_FROM_DLL(fpk_spblas_, mkl_scsrmv, - (const char * transa, const DAAL_INT * m, const DAAL_INT * k, const float * alpha, const char * matdescra, const float * val, - const DAAL_INT * indx, const DAAL_INT * pntrb, const DAAL_INT * pntre, const float * x, const float * beta, float * y), - (transa, m, k, alpha, matdescra, val, indx, pntrb, pntre, x, beta, y)); - -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, dpotrf, (const char * uplo, const DAAL_INT * n, double * a, const DAAL_INT * lda, DAAL_INT * info, int iuplo), - (uplo, n, a, lda, info, iuplo)); -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, spotrf, (const char * uplo, const DAAL_INT * n, float * a, const DAAL_INT * lda, DAAL_INT * info, int iuplo), - (uplo, n, a, lda, info, iuplo)); - -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, dpotrs, - (const char * uplo, const DAAL_INT * n, const DAAL_INT * nrhs, const double * a, const DAAL_INT * lda, double * b, - const DAAL_INT * ldb, DAAL_INT * info, int iuplo), - (uplo, n, nrhs, a, lda, b, ldb, info, iuplo)); -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, spotrs, - (const char * uplo, const DAAL_INT * n, const DAAL_INT * nrhs, const float * a, const DAAL_INT * lda, float * b, - const DAAL_INT * ldb, DAAL_INT * info, int iuplo), - (uplo, n, nrhs, a, lda, b, ldb, info, iuplo)); - -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, dgetrf, - (const DAAL_INT * m, const DAAL_INT * n, const double * a, const DAAL_INT * lda, const DAAL_INT * ipiv, DAAL_INT * info), - (m, n, a, lda, ipiv, info)); -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, sgetrf, - (const DAAL_INT * m, const DAAL_INT * n, const float * a, const DAAL_INT * lda, const DAAL_INT * ipiv, DAAL_INT * info), - (m, n, a, lda, ipiv, info)); - -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, dgetrs, - (const char * trans, const DAAL_INT * n, const DAAL_INT * nrhs, const double * a, const DAAL_INT * lda, const DAAL_INT * ipiv, - double * b, const DAAL_INT * ldb, DAAL_INT * info, int iuplo), - (trans, n, nrhs, a, lda, ipiv, b, ldb, info, iuplo)); -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, sgetrs, - (const char * trans, const DAAL_INT * n, const DAAL_INT * nrhs, const float * a, const DAAL_INT * lda, const DAAL_INT * ipiv, - float * b, const DAAL_INT * ldb, DAAL_INT * info, int iuplo), - (trans, n, nrhs, a, lda, ipiv, b, ldb, info, iuplo)); - -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, dpotri, (const char * uplo, const DAAL_INT * n, double * a, const DAAL_INT * lda, DAAL_INT * info, int iuplo), - (uplo, n, a, lda, info, iuplo)); -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, spotri, (const char * uplo, const DAAL_INT * n, float * a, const DAAL_INT * lda, DAAL_INT * info, int iuplo), - (uplo, n, a, lda, info, iuplo)); - -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, dgerqf, - (const DAAL_INT * m, const DAAL_INT * n, double * a, const DAAL_INT * lda, double * tau, double * work, - const DAAL_INT * lwork, DAAL_INT * info), - (m, n, a, lda, tau, work, lwork, info)); -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, sgerqf, - (const DAAL_INT * m, const DAAL_INT * n, float * a, const DAAL_INT * lda, float * tau, float * work, const DAAL_INT * lwork, - DAAL_INT * info), - (m, n, a, lda, tau, work, lwork, info)); - -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, dormrq, - (const char * side, const char * trans, const DAAL_INT * m, const DAAL_INT * n, const DAAL_INT * k, const double * a, - const DAAL_INT * lda, const double * tau, double * c, const DAAL_INT * ldc, double * work, const DAAL_INT * lwork, - DAAL_INT * info, int iside, int itrans), - (side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info, iside, itrans)); -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, sormrq, - (const char * side, const char * trans, const DAAL_INT * m, const DAAL_INT * n, const DAAL_INT * k, const float * a, - const DAAL_INT * lda, const float * tau, float * c, const DAAL_INT * ldc, float * work, const DAAL_INT * lwork, - DAAL_INT * info, int iside, int itrans), - (side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info, iside, itrans)); - -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, dormqr, - (const char * side, const char * trans, const DAAL_INT * m, const DAAL_INT * n, const DAAL_INT * k, const double * a, - const DAAL_INT * lda, const double * tau, double * c, const DAAL_INT * ldc, double * work, const DAAL_INT * lwork, - DAAL_INT * info, int iside, int itrans), - (side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info, iside, itrans)); -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, sormqr, - (const char * side, const char * trans, const DAAL_INT * m, const DAAL_INT * n, const DAAL_INT * k, const float * a, - const DAAL_INT * lda, const float * tau, float * c, const DAAL_INT * ldc, float * work, const DAAL_INT * lwork, - DAAL_INT * info, int iside, int itrans), - (side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info, iside, itrans)); - -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, dtrtrs, - (const char * uplo, const char * trans, const char * diag, const DAAL_INT * n, const DAAL_INT * nrhs, const double * a, - const DAAL_INT * lda, double * b, const DAAL_INT * ldb, DAAL_INT * info, int iuplo, int itrans, int idiag), - (uplo, trans, diag, n, nrhs, a, lda, b, ldb, info, iuplo, itrans, idiag)); -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, strtrs, - (const char * uplo, const char * trans, const char * diag, const DAAL_INT * n, const DAAL_INT * nrhs, const float * a, - const DAAL_INT * lda, float * b, const DAAL_INT * ldb, DAAL_INT * info, int iuplo, int itrans, int idiag), - (uplo, trans, diag, n, nrhs, a, lda, b, ldb, info, iuplo, itrans, idiag)); - -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, dpptrf, (const char * uplo, const DAAL_INT * n, double * ap, DAAL_INT * info, int iuplo), - (uplo, n, ap, info, iuplo)); -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, spptrf, (const char * uplo, const DAAL_INT * n, float * ap, DAAL_INT * info, int iuplo), - (uplo, n, ap, info, iuplo)); - -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, dgeqrf, - (const DAAL_INT * m, const DAAL_INT * n, double * a, const DAAL_INT * lda, double * tau, double * work, - const DAAL_INT * lwork, DAAL_INT * info), - (m, n, a, lda, tau, work, lwork, info)); -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, sgeqrf, - (const DAAL_INT * m, const DAAL_INT * n, float * a, const DAAL_INT * lda, float * tau, float * work, const DAAL_INT * lwork, - DAAL_INT * info), - (m, n, a, lda, tau, work, lwork, info)); - -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, dgeqp3, - (const DAAL_INT * m, const DAAL_INT * n, double * a, const DAAL_INT * lda, DAAL_INT * jpvt, double * tau, double * work, - const DAAL_INT * lwork, DAAL_INT * info), - (m, n, a, lda, jpvt, tau, work, lwork, info)); -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, sgeqp3, - (const DAAL_INT * m, const DAAL_INT * n, float * a, const DAAL_INT * lda, DAAL_INT * jpvt, float * tau, float * work, - const DAAL_INT * lwork, DAAL_INT * info), - (m, n, a, lda, jpvt, tau, work, lwork, info)); - -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, dorgqr, - (const DAAL_INT * m, const DAAL_INT * n, const DAAL_INT * k, double * a, const DAAL_INT * lda, const double * tau, - double * work, const DAAL_INT * lwork, DAAL_INT * info), - (m, n, k, a, lda, tau, work, lwork, info)); -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, sorgqr, - (const DAAL_INT * m, const DAAL_INT * n, const DAAL_INT * k, float * a, const DAAL_INT * lda, const float * tau, float * work, - const DAAL_INT * lwork, DAAL_INT * info), - (m, n, k, a, lda, tau, work, lwork, info)); - -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, dgesvd, - (const char * jobu, const char * jobvt, const DAAL_INT * m, const DAAL_INT * n, double * a, const DAAL_INT * lda, double * s, - double * u, const DAAL_INT * ldu, double * vt, const DAAL_INT * ldvt, double * work, const DAAL_INT * lwork, DAAL_INT * info, - int ijobu, int ijobvt), - (jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, info, ijobu, ijobvt)); -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, sgesvd, - (const char * jobu, const char * jobvt, const DAAL_INT * m, const DAAL_INT * n, float * a, const DAAL_INT * lda, float * s, - float * u, const DAAL_INT * ldu, float * vt, const DAAL_INT * ldvt, float * work, const DAAL_INT * lwork, DAAL_INT * info, - int ijobu, int ijobvt), - (jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, info, ijobu, ijobvt)); - -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, dsyevd, - (const char * jobz, const char * uplo, const DAAL_INT * n, double * a, const DAAL_INT * lda, double * w, double * work, - const DAAL_INT * lwork, DAAL_INT * iwork, const DAAL_INT * liwork, DAAL_INT * info, int ijobz, int iuplo), - (jobz, uplo, n, a, lda, w, work, lwork, iwork, liwork, info, ijobz, iuplo)); -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, ssyevd, - (const char * jobz, const char * uplo, const DAAL_INT * n, float * a, const DAAL_INT * lda, float * w, float * work, - const DAAL_INT * lwork, DAAL_INT * iwork, const DAAL_INT * liwork, DAAL_INT * info, int ijobz, int iuplo), - (jobz, uplo, n, a, lda, w, work, lwork, iwork, liwork, info, ijobz, iuplo)); - -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, dsyev, - (const char * jobz, const char * uplo, const DAAL_INT * n, double * a, const DAAL_INT * lda, double * w, double * work, - const DAAL_INT * lwork, DAAL_INT * info, int ijobz, int iuplo), - (jobz, uplo, n, a, lda, w, work, lwork, info, ijobz, iuplo)); -CALL_VOID_FUNC_FROM_DLL(fpk_lapack_, ssyev, - (const char * jobz, const char * uplo, const DAAL_INT * n, float * a, const DAAL_INT * lda, float * w, float * work, - const DAAL_INT * lwork, DAAL_INT * info, int ijobz, int iuplo), - (jobz, uplo, n, a, lda, w, work, lwork, info, ijobz, iuplo)); - -CALL_RET_FUNC_FROM_DLL(double, fpk_blas_, xddot, - (const DAAL_INT * n, const double * x, const DAAL_INT * incx, const double * y, const DAAL_INT * incy), (n, x, incx, y, incy)); -CALL_RET_FUNC_FROM_DLL(float, fpk_blas_, xsdot, (const DAAL_INT * n, const float * x, const DAAL_INT * incx, const float * y, const DAAL_INT * incy), - (n, x, incx, y, incy)); - -#define CSRMM_ARGS(FPTYPE) \ - const char *transa, const DAAL_INT *m, const DAAL_INT *n, const DAAL_INT *k, const FPTYPE *alpha, const char *matdescra, const FPTYPE *val, \ - const DAAL_INT *indx, const DAAL_INT *pntrb, const DAAL_INT *pntre, const FPTYPE *b, const DAAL_INT *ldb, const FPTYPE *beta, FPTYPE *c, \ - const DAAL_INT *ldc - -CALL_VOID_FUNC_FROM_DLL(fpk_spblas_, mkl_scsrmm, (CSRMM_ARGS(float)), - (transa, m, n, k, alpha, matdescra, val, indx, pntrb, pntre, b, ldb, beta, c, ldc)); -CALL_VOID_FUNC_FROM_DLL(fpk_spblas_, mkl_dcsrmm, (CSRMM_ARGS(double)), - (transa, m, n, k, alpha, matdescra, val, indx, pntrb, pntre, b, ldb, beta, c, ldc)); - -typedef int IppStatus; -typedef unsigned char Ipp8u; -typedef unsigned short Ipp16u; -typedef unsigned int Ipp32u; -typedef signed short Ipp16s; -typedef signed int Ipp32s; -typedef float Ipp32f; -typedef double Ipp64f; - -/* Used in Intel(R) oneAPI Data Analytics Library (oneDAL) via SS */ -CALL_RET_FUNC_FROM_DLL(IppStatus, fpk_dft_, ippsSortRadixAscend_64f_I, (Ipp64f * pSrcDst, Ipp64f * pTmp, Ipp32s len), (pSrcDst, pTmp, len)); -CALL_RET_FUNC_FROM_DLL(IppStatus, fpk_dft_, ippsSortRadixAscend_32f_I, (Ipp32f * pSrcDst, Ipp32f * pTmp, Ipp32s len), (pSrcDst, pTmp, len)); - -#define CALL_VOID_FUNC_FROM_DLL_ALONE(fn_dpref, fn_name, argdecl, argcall) \ - typedef void(*fn_dpref##fn_name##_t) argdecl; \ - static fn_dpref##fn_name##_t fn_dpref##fn_name##_ptr = NULL; \ - extern "C" DAAL_EXPORT void fn_dpref##fn_name argdecl \ - { \ - load_daal_thr_dll(); \ - if (fn_dpref##fn_name##_ptr == NULL) \ - { \ - fn_dpref##fn_name##_ptr = (fn_dpref##fn_name##_t)load_daal_thr_func(#fn_dpref #fn_name); \ - } \ - fn_dpref##fn_name##_ptr argcall; \ - } - -#define CALL_RET_FUNC_FROM_DLL_ALONE(ret_type, fn_dpref, fn_name, argdecl, argcall) \ - typedef ret_type(*fn_dpref##fn_name##_t) argdecl; \ - static fn_dpref##fn_name##_t fn_dpref##fn_name##_ptr = NULL; \ - extern "C" DAAL_EXPORT ret_type fn_dpref##fn_name argdecl \ - { \ - load_daal_thr_dll(); \ - if (fn_dpref##fn_name##_ptr == NULL) \ - { \ - fn_dpref##fn_name##_ptr = (fn_dpref##fn_name##_t)load_daal_thr_func(#fn_dpref #fn_name); \ - } \ - return fn_dpref##fn_name##_ptr argcall; \ - } - -CALL_VOID_FUNC_FROM_DLL_ALONE(fpk_serv_, set_num_threads, (int nth), (nth)); -CALL_RET_FUNC_FROM_DLL_ALONE(int, fpk_serv_, get_max_threads, (void), ()); -CALL_RET_FUNC_FROM_DLL_ALONE(int, fpk_serv_, set_num_threads_local, (int nth), (nth)); -CALL_RET_FUNC_FROM_DLL_ALONE(int, fpk_serv_, get_ncpus, (void), ()); -CALL_RET_FUNC_FROM_DLL_ALONE(int, fpk_serv_, get_ncorespercpu, (void), ()); -CALL_RET_FUNC_FROM_DLL_ALONE(int, fpk_serv_, get_ht, (void), ()); -CALL_RET_FUNC_FROM_DLL_ALONE(int, fpk_serv_, get_nlogicalcores, (void), ()); -CALL_RET_FUNC_FROM_DLL_ALONE(int, fpk_serv_, cpuisknm, (void), ()); -CALL_RET_FUNC_FROM_DLL_ALONE(int, fpk_serv_, enable_instructions, (int nth), (nth)); -CALL_RET_FUNC_FROM_DLL_ALONE(int, fpk_serv_, memmove_s, (void * dest, size_t dmax, const void * src, size_t smax), (dest, dmax, src, smax)); - -typedef void (*func_type)(DAAL_INT, DAAL_INT, DAAL_INT, void *); - -CALL_VOID_FUNC_FROM_DLL_ALONE(fpk_vsl_serv_, threader_for, (DAAL_INT n, DAAL_INT threads_request, void * a, func_type func), - (n, threads_request, a, func)); -CALL_VOID_FUNC_FROM_DLL_ALONE(fpk_vsl_serv_, threader_for_ordered, (DAAL_INT n, DAAL_INT threads_request, void * a, func_type func), - (n, threads_request, a, func)); -CALL_VOID_FUNC_FROM_DLL_ALONE(fpk_vsl_serv_, threader_sections, (DAAL_INT threads_request, void * a, func_type func), (threads_request, a, func)); -CALL_VOID_FUNC_FROM_DLL_ALONE(fpk_vsl_serv_, threader_ordered, (DAAL_INT i, DAAL_INT th_idx, DAAL_INT th_num, void * a, func_type func), - (i, th_idx, th_num, a, func)); -CALL_RET_FUNC_FROM_DLL_ALONE(DAAL_INT, fpk_vsl_serv_, threader_get_num_threads_limit, (void), ()); diff --git a/cpp/daal/src/externals/service_blas_mkl.h b/cpp/daal/src/externals/service_blas_mkl.h old mode 100755 new mode 100644 index 58b505a6067..b30fd5f6ee9 --- a/cpp/daal/src/externals/service_blas_mkl.h +++ b/cpp/daal/src/externals/service_blas_mkl.h @@ -25,65 +25,11 @@ #define __SERVICE_BLAS_MKL_H__ #include "services/daal_defines.h" -#include "mkl_daal.h" +#include -#if !defined(__DAAL_CONCAT4) - #define __DAAL_CONCAT4(a, b, c, d) __DAAL_CONCAT41(a, b, c, d) - #define __DAAL_CONCAT41(a, b, c, d) a##b##c##d -#endif - -#if !defined(__DAAL_CONCAT5) - #define __DAAL_CONCAT5(a, b, c, d, e) __DAAL_CONCAT51(a, b, c, d, e) - #define __DAAL_CONCAT51(a, b, c, d, e) a##b##c##d##e -#endif - -#if defined(__APPLE__) - #define __DAAL_MKL_SSE2 avx_ - #define __DAAL_MKL_SSE42 avx_ -#else - #define __DAAL_MKL_SSE2 sse2_ - #define __DAAL_MKL_SSE42 sse42_ -#endif - -#define __DAAL_MKLFN(f_cpu, f_pref, f_name) __DAAL_CONCAT4(fpk_, f_pref, f_cpu, f_name) -#define __DAAL_MKLFN_CALL(f_pref, f_name, f_args) __DAAL_MKLFN_CALL1(f_pref, f_name, f_args) -#define __DAAL_MKLFN_CALL_RETURN(f_pref, f_name, f_args) __DAAL_MKLFN_CALL2(f_pref, f_name, f_args) - -#define __DAAL_MKLFN_CALL1(f_pref, f_name, f_args) \ - if (avx512 == cpu) \ - { \ - __DAAL_MKLFN(avx512_, f_pref, f_name) f_args; \ - } \ - if (avx2 == cpu) \ - { \ - __DAAL_MKLFN(avx2_, f_pref, f_name) f_args; \ - } \ - if (sse42 == cpu) \ - { \ - __DAAL_MKLFN(__DAAL_MKL_SSE42, f_pref, f_name) f_args; \ - } \ - if (sse2 == cpu) \ - { \ - __DAAL_MKLFN(__DAAL_MKL_SSE2, f_pref, f_name) f_args; \ - } +#define __DAAL_MKLFN_CALL_BLAS(f_name, f_args) f_name f_args; -#define __DAAL_MKLFN_CALL2(f_pref, f_name, f_args) \ - if (avx512 == cpu) \ - { \ - return __DAAL_MKLFN(avx512_, f_pref, f_name) f_args; \ - } \ - if (avx2 == cpu) \ - { \ - return __DAAL_MKLFN(avx2_, f_pref, f_name) f_args; \ - } \ - if (sse42 == cpu) \ - { \ - return __DAAL_MKLFN(__DAAL_MKL_SSE42, f_pref, f_name) f_args; \ - } \ - if (sse2 == cpu) \ - { \ - return __DAAL_MKLFN(__DAAL_MKL_SSE2, f_pref, f_name) f_args; \ - } +#define __DAAL_MKLFN_CALL_RETURN_BLAS(f_name, f_args, res) res = f_name f_args; namespace daal { @@ -107,87 +53,96 @@ struct MklBlas static void xsyrk(char * uplo, char * trans, DAAL_INT * p, DAAL_INT * n, double * alpha, double * a, DAAL_INT * lda, double * beta, double * ata, DAAL_INT * ldata) { - __DAAL_MKLFN_CALL(blas_, dsyrk, (uplo, trans, p, n, alpha, a, lda, beta, ata, ldata)); + __DAAL_MKLFN_CALL_BLAS(dsyrk, (uplo, trans, (MKL_INT *)p, (MKL_INT *)n, alpha, a, (MKL_INT *)lda, beta, ata, (MKL_INT *)ldata)); } static void xxsyrk(char * uplo, char * trans, DAAL_INT * p, DAAL_INT * n, double * alpha, double * a, DAAL_INT * lda, double * beta, double * ata, DAAL_INT * ldata) { - __DAAL_MKLFN_CALL(blas_, xdsyrk, (uplo, trans, p, n, alpha, a, lda, beta, ata, ldata)); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_BLAS(dsyrk, (uplo, trans, (MKL_INT *)p, (MKL_INT *)n, alpha, a, (MKL_INT *)lda, beta, ata, (MKL_INT *)ldata)); + mkl_set_num_threads_local(old_nthr); } static void xsyr(const char * uplo, const DAAL_INT * n, const double * alpha, const double * x, const DAAL_INT * incx, double * a, const DAAL_INT * lda) { - __DAAL_MKLFN_CALL(blas_, dsyr, (uplo, n, alpha, x, incx, a, lda)); + __DAAL_MKLFN_CALL_BLAS(dsyr, (uplo, (MKL_INT *)n, alpha, x, (MKL_INT *)incx, a, (MKL_INT *)lda)); } static void xxsyr(const char * uplo, const DAAL_INT * n, const double * alpha, const double * x, const DAAL_INT * incx, double * a, const DAAL_INT * lda) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(blas_, dsyr, (uplo, n, alpha, x, incx, a, lda)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_BLAS(dsyr, (uplo, (MKL_INT *)n, alpha, x, (MKL_INT *)incx, a, (MKL_INT *)lda)); + mkl_set_num_threads_local(old_nthr); } static void xgemm(const char * transa, const char * transb, const DAAL_INT * p, const DAAL_INT * ny, const DAAL_INT * n, const double * alpha, const double * a, const DAAL_INT * lda, const double * y, const DAAL_INT * ldy, const double * beta, double * aty, const DAAL_INT * ldaty) { - __DAAL_MKLFN_CALL(blas_, dgemm, (transa, transb, p, ny, n, alpha, a, lda, y, ldy, beta, aty, ldaty)); + __DAAL_MKLFN_CALL_BLAS(dgemm, (transa, transb, (MKL_INT *)p, (MKL_INT *)ny, (MKL_INT *)n, alpha, a, (MKL_INT *)lda, y, (MKL_INT *)ldy, beta, + aty, (MKL_INT *)ldaty)); } static void xxgemm(const char * transa, const char * transb, const DAAL_INT * p, const DAAL_INT * ny, const DAAL_INT * n, const double * alpha, const double * a, const DAAL_INT * lda, const double * y, const DAAL_INT * ldy, const double * beta, double * aty, const DAAL_INT * ldaty) { - __DAAL_MKLFN_CALL(blas_, xdgemm, (transa, transb, p, ny, n, alpha, a, lda, y, ldy, beta, aty, ldaty)); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_BLAS(dgemm, (transa, transb, (MKL_INT *)p, (MKL_INT *)ny, (MKL_INT *)n, alpha, a, (MKL_INT *)lda, y, (MKL_INT *)ldy, beta, + aty, (MKL_INT *)ldaty)); + mkl_set_num_threads_local(old_nthr); } static void xsymm(const char * side, const char * uplo, const DAAL_INT * m, const DAAL_INT * n, const double * alpha, const double * a, const DAAL_INT * lda, const double * b, const DAAL_INT * ldb, const double * beta, double * c, const DAAL_INT * ldc) { - __DAAL_MKLFN_CALL(blas_, dsymm, (side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc)); + __DAAL_MKLFN_CALL_BLAS(dsymm, (side, uplo, (MKL_INT *)m, (MKL_INT *)n, alpha, a, (MKL_INT *)lda, b, (MKL_INT *)ldb, beta, c, (MKL_INT *)ldc)); } static void xxsymm(char * side, char * uplo, DAAL_INT * m, DAAL_INT * n, double * alpha, double * a, DAAL_INT * lda, double * b, DAAL_INT * ldb, double * beta, double * c, DAAL_INT * ldc) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(blas_, dsymm, (side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_BLAS(dsymm, (side, uplo, (MKL_INT *)m, (MKL_INT *)n, alpha, a, (MKL_INT *)lda, b, (MKL_INT *)ldb, beta, c, (MKL_INT *)ldc)); + mkl_set_num_threads_local(old_nthr); } static void xgemv(const char * trans, const DAAL_INT * m, const DAAL_INT * n, const double * alpha, const double * a, const DAAL_INT * lda, const double * x, const DAAL_INT * incx, const double * beta, double * y, const DAAL_INT * incy) { - __DAAL_MKLFN_CALL(blas_, dgemv, (trans, m, n, alpha, a, lda, x, incx, beta, y, incy)); + __DAAL_MKLFN_CALL_BLAS(dgemv, (trans, (MKL_INT *)m, (MKL_INT *)n, alpha, a, (MKL_INT *)lda, x, (MKL_INT *)incx, beta, y, (MKL_INT *)incy)); } static void xxgemv(const char * trans, const DAAL_INT * m, const DAAL_INT * n, const double * alpha, const double * a, const DAAL_INT * lda, const double * x, const DAAL_INT * incx, const double * beta, double * y, const DAAL_INT * incy) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(blas_, dgemv, (trans, m, n, alpha, a, lda, x, incx, beta, y, incy)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_BLAS(dgemv, (trans, (MKL_INT *)m, (MKL_INT *)n, alpha, a, (MKL_INT *)lda, x, (MKL_INT *)incx, beta, y, (MKL_INT *)incy)); + mkl_set_num_threads_local(old_nthr); } static void xaxpy(DAAL_INT * n, double * a, double * x, DAAL_INT * incx, double * y, DAAL_INT * incy) { - __DAAL_MKLFN_CALL(blas_, daxpy, (n, a, x, incx, y, incy)); + __DAAL_MKLFN_CALL_BLAS(daxpy, ((MKL_INT *)n, a, x, (MKL_INT *)incx, y, (MKL_INT *)incy)); } static void xxaxpy(const DAAL_INT * n, const double * a, const double * x, const DAAL_INT * incx, double * y, const DAAL_INT * incy) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(blas_, daxpy, (n, a, x, incx, y, incy)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_BLAS(daxpy, ((MKL_INT *)n, a, x, (MKL_INT *)incx, y, (MKL_INT *)incy)); + mkl_set_num_threads_local(old_nthr); } static double xxdot(const DAAL_INT * n, const double * x, const DAAL_INT * incx, const double * y, const DAAL_INT * incy) { - __DAAL_MKLFN_CALL_RETURN(blas_, xddot, (n, x, incx, y, incy)); - return 0; + int old_nthr = mkl_set_num_threads_local(1); + double res; + __DAAL_MKLFN_CALL_RETURN_BLAS(ddot, ((MKL_INT *)n, x, (MKL_INT *)incx, y, (MKL_INT *)incy), res); + mkl_set_num_threads_local(old_nthr); + return res; } }; @@ -203,87 +158,96 @@ struct MklBlas static void xsyrk(char * uplo, char * trans, DAAL_INT * p, DAAL_INT * n, float * alpha, float * a, DAAL_INT * lda, float * beta, float * ata, DAAL_INT * ldata) { - __DAAL_MKLFN_CALL(blas_, ssyrk, (uplo, trans, p, n, alpha, a, lda, beta, ata, ldata)); + __DAAL_MKLFN_CALL_BLAS(ssyrk, (uplo, trans, (MKL_INT *)p, (MKL_INT *)n, alpha, a, (MKL_INT *)lda, beta, ata, (MKL_INT *)ldata)); } static void xxsyrk(char * uplo, char * trans, DAAL_INT * p, DAAL_INT * n, float * alpha, float * a, DAAL_INT * lda, float * beta, float * ata, DAAL_INT * ldata) { - __DAAL_MKLFN_CALL(blas_, xssyrk, (uplo, trans, p, n, alpha, a, lda, beta, ata, ldata)); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_BLAS(ssyrk, (uplo, trans, (MKL_INT *)p, (MKL_INT *)n, alpha, a, (MKL_INT *)lda, beta, ata, (MKL_INT *)ldata)); + mkl_set_num_threads_local(old_nthr); } static void xsyr(const char * uplo, const DAAL_INT * n, const float * alpha, const float * x, const DAAL_INT * incx, float * a, const DAAL_INT * lda) { - __DAAL_MKLFN_CALL(blas_, ssyr, (uplo, n, alpha, x, incx, a, lda)); + __DAAL_MKLFN_CALL_BLAS(ssyr, (uplo, (MKL_INT *)n, alpha, x, (MKL_INT *)incx, a, (MKL_INT *)lda)); } static void xxsyr(const char * uplo, const DAAL_INT * n, const float * alpha, const float * x, const DAAL_INT * incx, float * a, const DAAL_INT * lda) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(blas_, ssyr, (uplo, n, alpha, x, incx, a, lda)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_BLAS(ssyr, (uplo, (MKL_INT *)n, alpha, x, (MKL_INT *)incx, a, (MKL_INT *)lda)); + mkl_set_num_threads_local(old_nthr); } static void xgemm(const char * transa, const char * transb, const DAAL_INT * p, const DAAL_INT * ny, const DAAL_INT * n, const float * alpha, const float * a, const DAAL_INT * lda, const float * y, const DAAL_INT * ldy, const float * beta, float * aty, const DAAL_INT * ldaty) { - __DAAL_MKLFN_CALL(blas_, sgemm, (transa, transb, p, ny, n, alpha, a, lda, y, ldy, beta, aty, ldaty)); + __DAAL_MKLFN_CALL_BLAS(sgemm, (transa, transb, (MKL_INT *)p, (MKL_INT *)ny, (MKL_INT *)n, alpha, a, (MKL_INT *)lda, y, (MKL_INT *)ldy, beta, + aty, (MKL_INT *)ldaty)); } static void xxgemm(const char * transa, const char * transb, const DAAL_INT * p, const DAAL_INT * ny, const DAAL_INT * n, const float * alpha, const float * a, const DAAL_INT * lda, const float * y, const DAAL_INT * ldy, const float * beta, float * aty, const DAAL_INT * ldaty) { - __DAAL_MKLFN_CALL(blas_, xsgemm, (transa, transb, p, ny, n, alpha, a, lda, y, ldy, beta, aty, ldaty)); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_BLAS(sgemm, (transa, transb, (MKL_INT *)p, (MKL_INT *)ny, (MKL_INT *)n, alpha, a, (MKL_INT *)lda, y, (MKL_INT *)ldy, beta, + aty, (MKL_INT *)ldaty)); + mkl_set_num_threads_local(old_nthr); } static void xsymm(const char * side, const char * uplo, const DAAL_INT * m, const DAAL_INT * n, const float * alpha, const float * a, const DAAL_INT * lda, const float * b, const DAAL_INT * ldb, const float * beta, float * c, const DAAL_INT * ldc) { - __DAAL_MKLFN_CALL(blas_, ssymm, (side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc)); + __DAAL_MKLFN_CALL_BLAS(ssymm, (side, uplo, (MKL_INT *)m, (MKL_INT *)n, alpha, a, (MKL_INT *)lda, b, (MKL_INT *)ldb, beta, c, (MKL_INT *)ldc)); } static void xxsymm(char * side, char * uplo, DAAL_INT * m, DAAL_INT * n, float * alpha, float * a, DAAL_INT * lda, float * b, DAAL_INT * ldb, float * beta, float * c, DAAL_INT * ldc) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(blas_, ssymm, (side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_BLAS(ssymm, (side, uplo, (MKL_INT *)m, (MKL_INT *)n, alpha, a, (MKL_INT *)lda, b, (MKL_INT *)ldb, beta, c, (MKL_INT *)ldc)); + mkl_set_num_threads_local(old_nthr); } static void xgemv(const char * trans, const DAAL_INT * m, const DAAL_INT * n, const float * alpha, const float * a, const DAAL_INT * lda, const float * x, const DAAL_INT * incx, const float * beta, float * y, const DAAL_INT * incy) { - __DAAL_MKLFN_CALL(blas_, sgemv, (trans, m, n, alpha, a, lda, x, incx, beta, y, incy)); + __DAAL_MKLFN_CALL_BLAS(sgemv, (trans, (MKL_INT *)m, (MKL_INT *)n, alpha, a, (MKL_INT *)lda, x, (MKL_INT *)incx, beta, y, (MKL_INT *)incy)); } static void xxgemv(const char * trans, const DAAL_INT * m, const DAAL_INT * n, const float * alpha, const float * a, const DAAL_INT * lda, const float * x, const DAAL_INT * incx, const float * beta, float * y, const DAAL_INT * incy) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(blas_, sgemv, (trans, m, n, alpha, a, lda, x, incx, beta, y, incy)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_BLAS(sgemv, (trans, (MKL_INT *)m, (MKL_INT *)n, alpha, a, (MKL_INT *)lda, x, (MKL_INT *)incx, beta, y, (MKL_INT *)incy)); + mkl_set_num_threads_local(old_nthr); } static void xaxpy(DAAL_INT * n, float * a, float * x, DAAL_INT * incx, float * y, DAAL_INT * incy) { - __DAAL_MKLFN_CALL(blas_, saxpy, (n, a, x, incx, y, incy)); + __DAAL_MKLFN_CALL_BLAS(saxpy, ((MKL_INT *)n, a, x, (MKL_INT *)incx, y, (MKL_INT *)incy)); } static void xxaxpy(const DAAL_INT * n, const float * a, const float * x, const DAAL_INT * incx, float * y, const DAAL_INT * incy) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(blas_, saxpy, (n, a, x, incx, y, incy)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_BLAS(saxpy, ((MKL_INT *)n, a, x, (MKL_INT *)incx, y, (MKL_INT *)incy)); + mkl_set_num_threads_local(old_nthr); } static float xxdot(const DAAL_INT * n, const float * x, const DAAL_INT * incx, const float * y, const DAAL_INT * incy) { - __DAAL_MKLFN_CALL_RETURN(blas_, xsdot, (n, x, incx, y, incy)); - return 0; + int old_nthr = mkl_set_num_threads_local(1); + float res; + __DAAL_MKLFN_CALL_RETURN_BLAS(sdot, ((MKL_INT *)n, x, (MKL_INT *)incx, y, (MKL_INT *)incy), res); + mkl_set_num_threads_local(old_nthr); + return res; } }; diff --git a/cpp/daal/src/externals/service_lapack_mkl.h b/cpp/daal/src/externals/service_lapack_mkl.h index 6bcbef317bc..37a81c3262f 100644 --- a/cpp/daal/src/externals/service_lapack_mkl.h +++ b/cpp/daal/src/externals/service_lapack_mkl.h @@ -25,65 +25,11 @@ #define __SERVICE_LAPACK_MKL_H__ #include "services/daal_defines.h" -#include "mkl_daal.h" +#include -#if !defined(__DAAL_CONCAT4) - #define __DAAL_CONCAT4(a, b, c, d) __DAAL_CONCAT41(a, b, c, d) - #define __DAAL_CONCAT41(a, b, c, d) a##b##c##d -#endif - -#if !defined(__DAAL_CONCAT5) - #define __DAAL_CONCAT5(a, b, c, d, e) __DAAL_CONCAT51(a, b, c, d, e) - #define __DAAL_CONCAT51(a, b, c, d, e) a##b##c##d##e -#endif - -#if defined(__APPLE__) - #define __DAAL_MKL_SSE2 avx_ - #define __DAAL_MKL_SSE42 avx_ -#else - #define __DAAL_MKL_SSE2 sse2_ - #define __DAAL_MKL_SSE42 sse42_ -#endif +#define __DAAL_MKLFN_CALL_LAPACK(f_name, f_args) f_name f_args; -#define __DAAL_MKLFN(f_cpu, f_pref, f_name) __DAAL_CONCAT4(fpk_, f_pref, f_cpu, f_name) -#define __DAAL_MKLFN_CALL(f_pref, f_name, f_args) __DAAL_MKLFN_CALL1(f_pref, f_name, f_args) -#define __DAAL_MKLFN_CALL_RETURN(f_pref, f_name, f_args) __DAAL_MKLFN_CALL2(f_pref, f_name, f_args) - -#define __DAAL_MKLFN_CALL1(f_pref, f_name, f_args) \ - if (avx512 == cpu) \ - { \ - __DAAL_MKLFN(avx512_, f_pref, f_name) f_args; \ - } \ - if (avx2 == cpu) \ - { \ - __DAAL_MKLFN(avx2_, f_pref, f_name) f_args; \ - } \ - if (sse42 == cpu) \ - { \ - __DAAL_MKLFN(__DAAL_MKL_SSE42, f_pref, f_name) f_args; \ - } \ - if (sse2 == cpu) \ - { \ - __DAAL_MKLFN(__DAAL_MKL_SSE2, f_pref, f_name) f_args; \ - } - -#define __DAAL_MKLFN_CALL2(f_pref, f_name, f_args) \ - if (avx512 == cpu) \ - { \ - return __DAAL_MKLFN(avx512_, f_pref, f_name) f_args; \ - } \ - if (avx2 == cpu) \ - { \ - return __DAAL_MKLFN(avx2_, f_pref, f_name) f_args; \ - } \ - if (sse42 == cpu) \ - { \ - return __DAAL_MKLFN(__DAAL_MKL_SSE42, f_pref, f_name) f_args; \ - } \ - if (sse2 == cpu) \ - { \ - return __DAAL_MKLFN(__DAAL_MKL_SSE2, f_pref, f_name) f_args; \ - } +#define __DAAL_MKLFN_CALL_RETURN_LAPACK(f_name, f_args) return f_name f_args; namespace daal { @@ -106,194 +52,211 @@ struct MklLapack static void xgetrf(DAAL_INT * m, DAAL_INT * n, double * a, DAAL_INT * lda, DAAL_INT * ipiv, DAAL_INT * info) { - __DAAL_MKLFN_CALL(lapack_, dgetrf, (m, n, a, lda, ipiv, info)); + __DAAL_MKLFN_CALL_LAPACK(dgetrf, ((MKL_INT *)m, (MKL_INT *)n, a, (MKL_INT *)lda, (MKL_INT *)ipiv, (MKL_INT *)info)); } static void xxgetrf(DAAL_INT * m, DAAL_INT * n, double * a, DAAL_INT * lda, DAAL_INT * ipiv, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, dgetrf, (m, n, a, lda, ipiv, info)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK(dgetrf, ((MKL_INT *)m, (MKL_INT *)n, a, (MKL_INT *)lda, (MKL_INT *)ipiv, (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } static void xgetrs(char * trans, DAAL_INT * n, DAAL_INT * nrhs, double * a, DAAL_INT * lda, DAAL_INT * ipiv, double * b, DAAL_INT * ldb, DAAL_INT * info) { - __DAAL_MKLFN_CALL(lapack_, dgetrs, (trans, n, nrhs, a, lda, ipiv, b, ldb, info, 1)); + __DAAL_MKLFN_CALL_LAPACK(dgetrs, + (trans, (MKL_INT *)n, (MKL_INT *)nrhs, a, (MKL_INT *)lda, (MKL_INT *)ipiv, b, (MKL_INT *)ldb, (MKL_INT *)info)); } static void xxgetrs(char * trans, DAAL_INT * n, DAAL_INT * nrhs, double * a, DAAL_INT * lda, DAAL_INT * ipiv, double * b, DAAL_INT * ldb, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, dgetrs, (trans, n, nrhs, a, lda, ipiv, b, ldb, info, 1)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK(dgetrs, + (trans, (MKL_INT *)n, (MKL_INT *)nrhs, a, (MKL_INT *)lda, (MKL_INT *)ipiv, b, (MKL_INT *)ldb, (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } static void xpotrf(char * uplo, DAAL_INT * p, double * ata, DAAL_INT * ldata, DAAL_INT * info) { - __DAAL_MKLFN_CALL(lapack_, dpotrf, (uplo, p, ata, ldata, info, 1)); + __DAAL_MKLFN_CALL_LAPACK(dpotrf, (uplo, (MKL_INT *)p, ata, (MKL_INT *)ldata, (MKL_INT *)info)); } static void xxpotrf(char * uplo, DAAL_INT * p, double * ata, DAAL_INT * ldata, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, dpotrf, (uplo, p, ata, ldata, info, 1)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK(dpotrf, (uplo, (MKL_INT *)p, ata, (MKL_INT *)ldata, (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } static void xpotrs(char * uplo, DAAL_INT * p, DAAL_INT * ny, double * ata, DAAL_INT * ldata, double * beta, DAAL_INT * ldaty, DAAL_INT * info) { - __DAAL_MKLFN_CALL(lapack_, dpotrs, (uplo, p, ny, ata, ldata, beta, ldaty, info, 1)); + __DAAL_MKLFN_CALL_LAPACK(dpotrs, (uplo, (MKL_INT *)p, (MKL_INT *)ny, ata, (MKL_INT *)ldata, beta, (MKL_INT *)ldaty, (MKL_INT *)info)); } static void xxpotrs(char * uplo, DAAL_INT * p, DAAL_INT * ny, double * ata, DAAL_INT * ldata, double * beta, DAAL_INT * ldaty, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, dpotrs, (uplo, p, ny, ata, ldata, beta, ldaty, info, 1)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK(dpotrs, (uplo, (MKL_INT *)p, (MKL_INT *)ny, ata, (MKL_INT *)ldata, beta, (MKL_INT *)ldaty, (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } static void xpotri(char * uplo, DAAL_INT * p, double * ata, DAAL_INT * ldata, DAAL_INT * info) { - __DAAL_MKLFN_CALL(lapack_, dpotri, (uplo, p, ata, ldata, info, 1)); + __DAAL_MKLFN_CALL_LAPACK(dpotri, (uplo, (MKL_INT *)p, ata, (MKL_INT *)ldata, (MKL_INT *)info)); } static void xxpotri(char * uplo, DAAL_INT * p, double * ata, DAAL_INT * ldata, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, dpotri, (uplo, p, ata, ldata, info, 1)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK(dpotri, (uplo, (MKL_INT *)p, ata, (MKL_INT *)ldata, (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } static void xgerqf(DAAL_INT * m, DAAL_INT * n, double * a, DAAL_INT * lda, double * tau, double * work, DAAL_INT * lwork, DAAL_INT * info) { - __DAAL_MKLFN_CALL(lapack_, dgerqf, (m, n, a, lda, tau, work, lwork, info)); + __DAAL_MKLFN_CALL_LAPACK(dgerqf, ((MKL_INT *)m, (MKL_INT *)n, a, (MKL_INT *)lda, tau, work, (MKL_INT *)lwork, (MKL_INT *)info)); } static void xxgerqf(DAAL_INT * m, DAAL_INT * n, double * a, DAAL_INT * lda, double * tau, double * work, DAAL_INT * lwork, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, dgerqf, (m, n, a, lda, tau, work, lwork, info)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK(dgerqf, ((MKL_INT *)m, (MKL_INT *)n, a, (MKL_INT *)lda, tau, work, (MKL_INT *)lwork, (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } static void xormrq(char * side, char * trans, DAAL_INT * m, DAAL_INT * n, DAAL_INT * k, double * a, DAAL_INT * lda, double * tau, double * c, DAAL_INT * ldc, double * work, DAAL_INT * lwork, DAAL_INT * info) { - __DAAL_MKLFN_CALL(lapack_, dormrq, (side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info, 1, 1)); + __DAAL_MKLFN_CALL_LAPACK(dormrq, (side, trans, (MKL_INT *)m, (MKL_INT *)n, (MKL_INT *)k, a, (MKL_INT *)lda, tau, c, (MKL_INT *)ldc, work, + (MKL_INT *)lwork, (MKL_INT *)info)); } static void xxormrq(char * side, char * trans, DAAL_INT * m, DAAL_INT * n, DAAL_INT * k, double * a, DAAL_INT * lda, double * tau, double * c, DAAL_INT * ldc, double * work, DAAL_INT * lwork, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, dormrq, (side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info, 1, 1)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK(dormrq, (side, trans, (MKL_INT *)m, (MKL_INT *)n, (MKL_INT *)k, a, (MKL_INT *)lda, tau, c, (MKL_INT *)ldc, work, + (MKL_INT *)lwork, (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } static void xtrtrs(char * uplo, char * trans, char * diag, DAAL_INT * n, DAAL_INT * nrhs, double * a, DAAL_INT * lda, double * b, DAAL_INT * ldb, DAAL_INT * info) { - __DAAL_MKLFN_CALL(lapack_, dtrtrs, (uplo, trans, diag, n, nrhs, a, lda, b, ldb, info, 1, 1, 1)); + __DAAL_MKLFN_CALL_LAPACK(dtrtrs, (uplo, trans, diag, (MKL_INT *)n, (MKL_INT *)nrhs, a, (MKL_INT *)lda, b, (MKL_INT *)ldb, (MKL_INT *)info)); } static void xxtrtrs(char * uplo, char * trans, char * diag, DAAL_INT * n, DAAL_INT * nrhs, double * a, DAAL_INT * lda, double * b, DAAL_INT * ldb, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, dtrtrs, (uplo, trans, diag, n, nrhs, a, lda, b, ldb, info, 1, 1, 1)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK(dtrtrs, (uplo, trans, diag, (MKL_INT *)n, (MKL_INT *)nrhs, a, (MKL_INT *)lda, b, (MKL_INT *)ldb, (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } - static void xpptrf(char * uplo, DAAL_INT * n, double * ap, DAAL_INT * info) { __DAAL_MKLFN_CALL(lapack_, dpptrf, (uplo, n, ap, info, 1)); } + static void xpptrf(char * uplo, DAAL_INT * n, double * ap, DAAL_INT * info) + { + __DAAL_MKLFN_CALL_LAPACK(dpptrf, (uplo, (MKL_INT *)n, ap, (MKL_INT *)info)); + } static void xxpptrf(char * uplo, DAAL_INT * n, double * ap, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, dpptrf, (uplo, n, ap, info, 1)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK(dpptrf, (uplo, (MKL_INT *)n, ap, (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } static void xgeqrf(DAAL_INT m, DAAL_INT n, double * a, DAAL_INT lda, double * tau, double * work, DAAL_INT lwork, DAAL_INT * info) { - __DAAL_MKLFN_CALL(lapack_, dgeqrf, (&m, &n, a, &lda, tau, work, &lwork, info)); + __DAAL_MKLFN_CALL_LAPACK(dgeqrf, ((MKL_INT *)(&m), (MKL_INT *)(&n), a, (MKL_INT *)(&lda), tau, work, (MKL_INT *)(&lwork), (MKL_INT *)info)); } static void xxgeqrf(DAAL_INT m, DAAL_INT n, double * a, DAAL_INT lda, double * tau, double * work, DAAL_INT lwork, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, dgeqrf, (&m, &n, a, &lda, tau, work, &lwork, info)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK(dgeqrf, ((MKL_INT *)(&m), (MKL_INT *)(&n), a, (MKL_INT *)(&lda), tau, work, (MKL_INT *)(&lwork), (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } static void xgeqp3(const DAAL_INT m, const DAAL_INT n, double * a, const DAAL_INT lda, DAAL_INT * jpvt, double * tau, double * work, const DAAL_INT lwork, DAAL_INT * info) { - __DAAL_MKLFN_CALL(lapack_, dgeqp3, (&m, &n, a, &lda, jpvt, tau, work, &lwork, info)); + __DAAL_MKLFN_CALL_LAPACK( + dgeqp3, ((MKL_INT *)(&m), (MKL_INT *)(&n), a, (MKL_INT *)(&lda), (MKL_INT *)jpvt, tau, work, (MKL_INT *)(&lwork), (MKL_INT *)info)); } static void xxgeqp3(DAAL_INT m, DAAL_INT n, double * a, DAAL_INT lda, DAAL_INT * jpvt, double * tau, double * work, DAAL_INT lwork, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, dgeqp3, (&m, &n, a, &lda, jpvt, tau, work, &lwork, info)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK( + dgeqp3, ((MKL_INT *)(&m), (MKL_INT *)(&n), a, (MKL_INT *)(&lda), (MKL_INT *)jpvt, tau, work, (MKL_INT *)(&lwork), (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } static void xorgqr(const DAAL_INT m, const DAAL_INT n, const DAAL_INT k, double * a, const DAAL_INT lda, const double * tau, double * work, const DAAL_INT lwork, DAAL_INT * info) { - __DAAL_MKLFN_CALL(lapack_, dorgqr, (&m, &n, &k, a, &lda, tau, work, &lwork, info)); + __DAAL_MKLFN_CALL_LAPACK( + dorgqr, ((MKL_INT *)(&m), (MKL_INT *)(&n), (MKL_INT *)(&k), a, (MKL_INT *)(&lda), tau, work, (MKL_INT *)(&lwork), (MKL_INT *)info)); } static void xxorgqr(DAAL_INT m, DAAL_INT n, DAAL_INT k, double * a, DAAL_INT lda, double * tau, double * work, DAAL_INT lwork, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, dorgqr, (&m, &n, &k, a, &lda, tau, work, &lwork, info)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK( + dorgqr, ((MKL_INT *)(&m), (MKL_INT *)(&n), (MKL_INT *)(&k), a, (MKL_INT *)(&lda), tau, work, (MKL_INT *)(&lwork), (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } static void xgesvd(char jobu, char jobvt, DAAL_INT m, DAAL_INT n, double * a, DAAL_INT lda, double * s, double * u, DAAL_INT ldu, double * vt, DAAL_INT ldvt, double * work, DAAL_INT lwork, DAAL_INT * info) { - __DAAL_MKLFN_CALL(lapack_, dgesvd, (&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, info, 1, 1)); + __DAAL_MKLFN_CALL_LAPACK(dgesvd, (&jobu, &jobvt, (MKL_INT *)(&m), (MKL_INT *)(&n), a, (MKL_INT *)(&lda), s, u, (MKL_INT *)(&ldu), vt, + (MKL_INT *)(&ldvt), work, (MKL_INT *)(&lwork), (MKL_INT *)info)); } static void xxgesvd(char jobu, char jobvt, DAAL_INT m, DAAL_INT n, double * a, DAAL_INT lda, double * s, double * u, DAAL_INT ldu, double * vt, DAAL_INT ldvt, double * work, DAAL_INT lwork, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, dgesvd, (&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, info, 1, 1)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK(dgesvd, (&jobu, &jobvt, (MKL_INT *)(&m), (MKL_INT *)(&n), a, (MKL_INT *)(&lda), s, u, (MKL_INT *)(&ldu), vt, + (MKL_INT *)(&ldvt), work, (MKL_INT *)(&lwork), (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } static void xsyevd(char * jobz, char * uplo, DAAL_INT * n, double * a, DAAL_INT * lda, double * w, double * work, DAAL_INT * lwork, DAAL_INT * iwork, DAAL_INT * liwork, DAAL_INT * info) { - __DAAL_MKLFN_CALL(lapack_, dsyevd, (jobz, uplo, n, a, lda, w, work, lwork, iwork, liwork, info, 1, 1)); + __DAAL_MKLFN_CALL_LAPACK( + dsyevd, (jobz, uplo, (MKL_INT *)n, a, (MKL_INT *)lda, w, work, (MKL_INT *)lwork, (MKL_INT *)iwork, (MKL_INT *)liwork, (MKL_INT *)info)); } static void xxsyevd(char * jobz, char * uplo, DAAL_INT * n, double * a, DAAL_INT * lda, double * w, double * work, DAAL_INT * lwork, DAAL_INT * iwork, DAAL_INT * liwork, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, dsyevd, (jobz, uplo, n, a, lda, w, work, lwork, iwork, liwork, info, 1, 1)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK( + dsyevd, (jobz, uplo, (MKL_INT *)n, a, (MKL_INT *)lda, w, work, (MKL_INT *)lwork, (MKL_INT *)iwork, (MKL_INT *)liwork, (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } static void xormqr(char * side, char * trans, DAAL_INT * m, DAAL_INT * n, DAAL_INT * k, double * a, DAAL_INT * lda, double * tau, double * c, DAAL_INT * ldc, double * work, DAAL_INT * lwork, DAAL_INT * info) { - __DAAL_MKLFN_CALL(lapack_, dormqr, (side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info, 1, 1)); + __DAAL_MKLFN_CALL_LAPACK(dormqr, (side, trans, (MKL_INT *)m, (MKL_INT *)n, (MKL_INT *)k, a, (MKL_INT *)lda, tau, c, (MKL_INT *)ldc, work, + (MKL_INT *)lwork, (MKL_INT *)info)); } static void xxormqr(char * side, char * trans, DAAL_INT * m, DAAL_INT * n, DAAL_INT * k, double * a, DAAL_INT * lda, double * tau, double * c, DAAL_INT * ldc, double * work, DAAL_INT * lwork, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, dormqr, (side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info, 1, 1)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK(dormqr, (side, trans, (MKL_INT *)m, (MKL_INT *)n, (MKL_INT *)k, a, (MKL_INT *)lda, tau, c, (MKL_INT *)ldc, work, + (MKL_INT *)lwork, (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } }; @@ -308,193 +271,210 @@ struct MklLapack static void xgetrf(DAAL_INT * m, DAAL_INT * n, float * a, DAAL_INT * lda, DAAL_INT * ipiv, DAAL_INT * info) { - __DAAL_MKLFN_CALL(lapack_, sgetrf, (m, n, a, lda, ipiv, info)); + __DAAL_MKLFN_CALL_LAPACK(sgetrf, ((MKL_INT *)m, (MKL_INT *)n, a, (MKL_INT *)lda, (MKL_INT *)ipiv, (MKL_INT *)info)); } static void xxgetrf(DAAL_INT * m, DAAL_INT * n, float * a, DAAL_INT * lda, DAAL_INT * ipiv, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, sgetrf, (m, n, a, lda, ipiv, info)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK(sgetrf, ((MKL_INT *)m, (MKL_INT *)n, a, (MKL_INT *)lda, (MKL_INT *)ipiv, (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } static void xgetrs(char * trans, DAAL_INT * n, DAAL_INT * nrhs, float * a, DAAL_INT * lda, DAAL_INT * ipiv, float * b, DAAL_INT * ldb, DAAL_INT * info) { - __DAAL_MKLFN_CALL(lapack_, sgetrs, (trans, n, nrhs, a, lda, ipiv, b, ldb, info, 1)); + __DAAL_MKLFN_CALL_LAPACK(sgetrs, + (trans, (MKL_INT *)n, (MKL_INT *)nrhs, a, (MKL_INT *)lda, (MKL_INT *)ipiv, b, (MKL_INT *)ldb, (MKL_INT *)info)); } static void xxgetrs(char * trans, DAAL_INT * n, DAAL_INT * nrhs, float * a, DAAL_INT * lda, DAAL_INT * ipiv, float * b, DAAL_INT * ldb, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, sgetrs, (trans, n, nrhs, a, lda, ipiv, b, ldb, info, 1)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK(sgetrs, + (trans, (MKL_INT *)n, (MKL_INT *)nrhs, a, (MKL_INT *)lda, (MKL_INT *)ipiv, b, (MKL_INT *)ldb, (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } static void xpotrf(char * uplo, DAAL_INT * p, float * ata, DAAL_INT * ldata, DAAL_INT * info) { - __DAAL_MKLFN_CALL(lapack_, spotrf, (uplo, p, ata, ldata, info, 1)); + __DAAL_MKLFN_CALL_LAPACK(spotrf, (uplo, (MKL_INT *)p, ata, (MKL_INT *)ldata, (MKL_INT *)info)); } static void xxpotrf(char * uplo, DAAL_INT * p, float * ata, DAAL_INT * ldata, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, spotrf, (uplo, p, ata, ldata, info, 1)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK(spotrf, (uplo, (MKL_INT *)p, ata, (MKL_INT *)ldata, (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } static void xpotrs(char * uplo, DAAL_INT * p, DAAL_INT * ny, float * ata, DAAL_INT * ldata, float * beta, DAAL_INT * ldaty, DAAL_INT * info) { - __DAAL_MKLFN_CALL(lapack_, spotrs, (uplo, p, ny, ata, ldata, beta, ldaty, info, 1)); + __DAAL_MKLFN_CALL_LAPACK(spotrs, (uplo, (MKL_INT *)p, (MKL_INT *)ny, ata, (MKL_INT *)ldata, beta, (MKL_INT *)ldaty, (MKL_INT *)info)); } static void xxpotrs(char * uplo, DAAL_INT * p, DAAL_INT * ny, float * ata, DAAL_INT * ldata, float * beta, DAAL_INT * ldaty, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, spotrs, (uplo, p, ny, ata, ldata, beta, ldaty, info, 1)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK(spotrs, (uplo, (MKL_INT *)p, (MKL_INT *)ny, ata, (MKL_INT *)ldata, beta, (MKL_INT *)ldaty, (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } static void xpotri(char * uplo, DAAL_INT * p, float * ata, DAAL_INT * ldata, DAAL_INT * info) { - __DAAL_MKLFN_CALL(lapack_, spotri, (uplo, p, ata, ldata, info, 1)); + __DAAL_MKLFN_CALL_LAPACK(spotri, (uplo, (MKL_INT *)p, ata, (MKL_INT *)ldata, (MKL_INT *)info)); } static void xxpotri(char * uplo, DAAL_INT * p, float * ata, DAAL_INT * ldata, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, spotri, (uplo, p, ata, ldata, info, 1)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK(spotri, (uplo, (MKL_INT *)p, ata, (MKL_INT *)ldata, (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } static void xgerqf(DAAL_INT * m, DAAL_INT * n, float * a, DAAL_INT * lda, float * tau, float * work, DAAL_INT * lwork, DAAL_INT * info) { - __DAAL_MKLFN_CALL(lapack_, sgerqf, (m, n, a, lda, tau, work, lwork, info)); + __DAAL_MKLFN_CALL_LAPACK(sgerqf, ((MKL_INT *)m, (MKL_INT *)n, a, (MKL_INT *)lda, tau, work, (MKL_INT *)lwork, (MKL_INT *)info)); } static void xxgerqf(DAAL_INT * m, DAAL_INT * n, float * a, DAAL_INT * lda, float * tau, float * work, DAAL_INT * lwork, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, sgerqf, (m, n, a, lda, tau, work, lwork, info)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK(sgerqf, ((MKL_INT *)m, (MKL_INT *)n, a, (MKL_INT *)lda, tau, work, (MKL_INT *)lwork, (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } static void xormrq(char * side, char * trans, DAAL_INT * m, DAAL_INT * n, DAAL_INT * k, float * a, DAAL_INT * lda, float * tau, float * c, DAAL_INT * ldc, float * work, DAAL_INT * lwork, DAAL_INT * info) { - __DAAL_MKLFN_CALL(lapack_, sormrq, (side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info, 1, 1)); + __DAAL_MKLFN_CALL_LAPACK(sormrq, (side, trans, (MKL_INT *)m, (MKL_INT *)n, (MKL_INT *)k, a, (MKL_INT *)lda, tau, c, (MKL_INT *)ldc, work, + (MKL_INT *)lwork, (MKL_INT *)info)); } static void xxormrq(char * side, char * trans, DAAL_INT * m, DAAL_INT * n, DAAL_INT * k, float * a, DAAL_INT * lda, float * tau, float * c, DAAL_INT * ldc, float * work, DAAL_INT * lwork, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, sormrq, (side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info, 1, 1)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK(sormrq, (side, trans, (MKL_INT *)m, (MKL_INT *)n, (MKL_INT *)k, a, (MKL_INT *)lda, tau, c, (MKL_INT *)ldc, work, + (MKL_INT *)lwork, (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } static void xtrtrs(char * uplo, char * trans, char * diag, DAAL_INT * n, DAAL_INT * nrhs, float * a, DAAL_INT * lda, float * b, DAAL_INT * ldb, DAAL_INT * info) { - __DAAL_MKLFN_CALL(lapack_, strtrs, (uplo, trans, diag, n, nrhs, a, lda, b, ldb, info, 1, 1, 1)); + __DAAL_MKLFN_CALL_LAPACK(strtrs, (uplo, trans, diag, (MKL_INT *)n, (MKL_INT *)nrhs, a, (MKL_INT *)lda, b, (MKL_INT *)ldb, (MKL_INT *)info)); } static void xxtrtrs(char * uplo, char * trans, char * diag, DAAL_INT * n, DAAL_INT * nrhs, float * a, DAAL_INT * lda, float * b, DAAL_INT * ldb, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, strtrs, (uplo, trans, diag, n, nrhs, a, lda, b, ldb, info, 1, 1, 1)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK(strtrs, (uplo, trans, diag, (MKL_INT *)n, (MKL_INT *)nrhs, a, (MKL_INT *)lda, b, (MKL_INT *)ldb, (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } - static void xpptrf(char * uplo, DAAL_INT * n, float * ap, DAAL_INT * info) { __DAAL_MKLFN_CALL(lapack_, spptrf, (uplo, n, ap, info, 1)); } + static void xpptrf(char * uplo, DAAL_INT * n, float * ap, DAAL_INT * info) + { + __DAAL_MKLFN_CALL_LAPACK(spptrf, (uplo, (MKL_INT *)n, ap, (MKL_INT *)info)); + } static void xxpptrf(char * uplo, DAAL_INT * n, float * ap, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, spptrf, (uplo, n, ap, info, 1)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK(spptrf, (uplo, (MKL_INT *)n, ap, (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } static void xgeqrf(DAAL_INT m, DAAL_INT n, float * a, DAAL_INT lda, float * tau, float * work, DAAL_INT lwork, DAAL_INT * info) { - __DAAL_MKLFN_CALL(lapack_, sgeqrf, (&m, &n, a, &lda, tau, work, &lwork, info)); + __DAAL_MKLFN_CALL_LAPACK(sgeqrf, ((MKL_INT *)(&m), (MKL_INT *)(&n), a, (MKL_INT *)(&lda), tau, work, (MKL_INT *)(&lwork), (MKL_INT *)info)); } static void xxgeqrf(DAAL_INT m, DAAL_INT n, float * a, DAAL_INT lda, float * tau, float * work, DAAL_INT lwork, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, sgeqrf, (&m, &n, a, &lda, tau, work, &lwork, info)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK(sgeqrf, ((MKL_INT *)(&m), (MKL_INT *)(&n), a, (MKL_INT *)(&lda), tau, work, (MKL_INT *)(&lwork), (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } static void xgeqp3(const DAAL_INT m, const DAAL_INT n, float * a, const DAAL_INT lda, DAAL_INT * jpvt, float * tau, float * work, const DAAL_INT lwork, DAAL_INT * info) { - __DAAL_MKLFN_CALL(lapack_, sgeqp3, (&m, &n, a, &lda, jpvt, tau, work, &lwork, info)); + __DAAL_MKLFN_CALL_LAPACK( + sgeqp3, ((MKL_INT *)(&m), (MKL_INT *)(&n), a, (MKL_INT *)(&lda), (MKL_INT *)jpvt, tau, work, (MKL_INT *)(&lwork), (MKL_INT *)info)); } static void xxgeqp3(DAAL_INT m, DAAL_INT n, float * a, DAAL_INT lda, DAAL_INT * jpvt, float * tau, float * work, DAAL_INT lwork, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, sgeqp3, (&m, &n, a, &lda, jpvt, tau, work, &lwork, info)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK( + sgeqp3, ((MKL_INT *)(&m), (MKL_INT *)(&n), a, (MKL_INT *)(&lda), (MKL_INT *)jpvt, tau, work, (MKL_INT *)(&lwork), (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } static void xorgqr(const DAAL_INT m, const DAAL_INT n, const DAAL_INT k, float * a, const DAAL_INT lda, const float * tau, float * work, const DAAL_INT lwork, DAAL_INT * info) { - __DAAL_MKLFN_CALL(lapack_, sorgqr, (&m, &n, &k, a, &lda, tau, work, &lwork, info)); + __DAAL_MKLFN_CALL_LAPACK( + sorgqr, ((MKL_INT *)(&m), (MKL_INT *)(&n), (MKL_INT *)(&k), a, (MKL_INT *)(&lda), tau, work, (MKL_INT *)(&lwork), (MKL_INT *)info)); } static void xxorgqr(DAAL_INT m, DAAL_INT n, DAAL_INT k, float * a, DAAL_INT lda, float * tau, float * work, DAAL_INT lwork, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, sorgqr, (&m, &n, &k, a, &lda, tau, work, &lwork, info)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK( + sorgqr, ((MKL_INT *)(&m), (MKL_INT *)(&n), (MKL_INT *)(&k), a, (MKL_INT *)(&lda), tau, work, (MKL_INT *)(&lwork), (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } static void xgesvd(char jobu, char jobvt, DAAL_INT m, DAAL_INT n, float * a, DAAL_INT lda, float * s, float * u, DAAL_INT ldu, float * vt, DAAL_INT ldvt, float * work, DAAL_INT lwork, DAAL_INT * info) { - __DAAL_MKLFN_CALL(lapack_, sgesvd, (&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, info, 1, 1)); + __DAAL_MKLFN_CALL_LAPACK(sgesvd, (&jobu, &jobvt, (MKL_INT *)(&m), (MKL_INT *)(&n), a, (MKL_INT *)(&lda), s, u, (MKL_INT *)(&ldu), vt, + (MKL_INT *)(&ldvt), work, (MKL_INT *)(&lwork), (MKL_INT *)info)); } static void xxgesvd(char jobu, char jobvt, DAAL_INT m, DAAL_INT n, float * a, DAAL_INT lda, float * s, float * u, DAAL_INT ldu, float * vt, DAAL_INT ldvt, float * work, DAAL_INT lwork, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, sgesvd, (&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, info, 1, 1)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK(sgesvd, (&jobu, &jobvt, (MKL_INT *)(&m), (MKL_INT *)(&n), a, (MKL_INT *)(&lda), s, u, (MKL_INT *)(&ldu), vt, + (MKL_INT *)(&ldvt), work, (MKL_INT *)(&lwork), (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } static void xsyevd(char * jobz, char * uplo, DAAL_INT * n, float * a, DAAL_INT * lda, float * w, float * work, DAAL_INT * lwork, DAAL_INT * iwork, DAAL_INT * liwork, DAAL_INT * info) { - __DAAL_MKLFN_CALL(lapack_, ssyevd, (jobz, uplo, n, a, lda, w, work, lwork, iwork, liwork, info, 1, 1)); + __DAAL_MKLFN_CALL_LAPACK( + ssyevd, (jobz, uplo, (MKL_INT *)n, a, (MKL_INT *)lda, w, work, (MKL_INT *)lwork, (MKL_INT *)iwork, (MKL_INT *)liwork, (MKL_INT *)info)); } static void xxsyevd(char * jobz, char * uplo, DAAL_INT * n, float * a, DAAL_INT * lda, float * w, float * work, DAAL_INT * lwork, DAAL_INT * iwork, DAAL_INT * liwork, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, ssyevd, (jobz, uplo, n, a, lda, w, work, lwork, iwork, liwork, info, 1, 1)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK( + ssyevd, (jobz, uplo, (MKL_INT *)n, a, (MKL_INT *)lda, w, work, (MKL_INT *)lwork, (MKL_INT *)iwork, (MKL_INT *)liwork, (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } static void xormqr(char * side, char * trans, DAAL_INT * m, DAAL_INT * n, DAAL_INT * k, float * a, DAAL_INT * lda, float * tau, float * c, DAAL_INT * ldc, float * work, DAAL_INT * lwork, DAAL_INT * info) { - __DAAL_MKLFN_CALL(lapack_, sormqr, (side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info, 1, 1)); + __DAAL_MKLFN_CALL_LAPACK(sormqr, (side, trans, (MKL_INT *)m, (MKL_INT *)n, (MKL_INT *)k, a, (MKL_INT *)lda, tau, c, (MKL_INT *)ldc, work, + (MKL_INT *)lwork, (MKL_INT *)info)); } static void xxormqr(char * side, char * trans, DAAL_INT * m, DAAL_INT * n, DAAL_INT * k, float * a, DAAL_INT * lda, float * tau, float * c, DAAL_INT * ldc, float * work, DAAL_INT * lwork, DAAL_INT * info) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(lapack_, sormqr, (side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info, 1, 1)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_LAPACK(sormqr, (side, trans, (MKL_INT *)m, (MKL_INT *)n, (MKL_INT *)k, a, (MKL_INT *)lda, tau, c, (MKL_INT *)ldc, work, + (MKL_INT *)lwork, (MKL_INT *)info)); + mkl_set_num_threads_local(old_nthr); } }; diff --git a/cpp/daal/src/externals/service_math_mkl.h b/cpp/daal/src/externals/service_math_mkl.h index a8bde41720f..fa5ce46a5ea 100644 --- a/cpp/daal/src/externals/service_math_mkl.h +++ b/cpp/daal/src/externals/service_math_mkl.h @@ -25,46 +25,12 @@ #define __SERVICE_MATH_MKL_H__ #include -#include "vmlvsl.h" +#include #include "src/services/service_defines.h" -#if !defined(__DAAL_CONCAT5) - #define __DAAL_CONCAT5(a, b, c, d, e) __DAAL_CONCAT51(a, b, c, d, e) - #define __DAAL_CONCAT51(a, b, c, d, e) a##b##c##d##e -#endif - -#define VMLFN(f_cpu, f_name, f_suff) __DAAL_CONCAT5(fpk_vml_, f_name, _, f_cpu, f_suff) -#define VMLFN_CALL(f_name, f_suff, f_args) VMLFN_CALL1(f_name, f_suff, f_args) - -#if defined(__APPLE__) - #define __DAAL_MKLVML_SSE2 E9 - #define __DAAL_MKLVML_SSE42 E9 -#else - #define __DAAL_MKLVML_SSE2 EX - #define __DAAL_MKLVML_SSE42 H8 -#endif - -#define VMLFN_CALL1(f_name, f_suff, f_args) \ - if (avx512 == cpu) \ - { \ - VMLFN(Z0, f_name, f_suff) f_args; \ - return; \ - } \ - if (avx2 == cpu) \ - { \ - VMLFN(L9, f_name, f_suff) f_args; \ - return; \ - } \ - if (sse42 == cpu) \ - { \ - VMLFN(__DAAL_MKLVML_SSE42, f_name, f_suff) f_args; \ - return; \ - } \ - if (sse2 == cpu) \ - { \ - VMLFN(__DAAL_MKLVML_SSE2, f_name, f_suff) f_args; \ - return; \ - } +#define __DAAL_MKLFN_CALL_MATH(f_name, f_args) \ + f_name f_args; \ + return; namespace daal { @@ -135,27 +101,57 @@ struct MklMath return r; } - static void vPowx(SizeType n, const double * in, double in1, double * out) { VMLFN_CALL(dPowx, HAynn, ((int)n, in, in1, out)); } + static void vPowx(SizeType n, const double * in, double in1, double * out) + { + __DAAL_MKLFN_CALL_MATH(vmdPowx, ((int)n, in, in1, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + } - static void vCeil(SizeType n, const double * in, double * out) { VMLFN_CALL(dCeil, HAynn, ((int)n, in, out)); } + static void vCeil(SizeType n, const double * in, double * out) + { + __DAAL_MKLFN_CALL_MATH(vmdCeil, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + } - static void vErfInv(SizeType n, const double * in, double * out) { VMLFN_CALL(dErfInv, HAynn, ((int)n, in, out)); } + static void vErfInv(SizeType n, const double * in, double * out) + { + __DAAL_MKLFN_CALL_MATH(vmdErfInv, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + } - static void vErf(SizeType n, const double * in, double * out) { VMLFN_CALL(dErf, HAynn, ((int)n, in, out)); } + static void vErf(SizeType n, const double * in, double * out) + { + __DAAL_MKLFN_CALL_MATH(vmdErf, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + } - static void vExp(SizeType n, const double * in, double * out) { VMLFN_CALL(dExp, HAynn, ((int)n, in, out)); } + static void vExp(SizeType n, const double * in, double * out) + { + __DAAL_MKLFN_CALL_MATH(vmdExp, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + } static double vExpThreshold() { return -650.0; } - static void vTanh(SizeType n, const double * in, double * out) { VMLFN_CALL(dTanh, HAynn, ((int)n, in, out)); } + static void vTanh(SizeType n, const double * in, double * out) + { + __DAAL_MKLFN_CALL_MATH(vmdTanh, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + } - static void vSqrt(SizeType n, const double * in, double * out) { VMLFN_CALL(dSqrt, HAynn, ((int)n, in, out)); } + static void vSqrt(SizeType n, const double * in, double * out) + { + __DAAL_MKLFN_CALL_MATH(vmdSqrt, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + } - static void vLog(SizeType n, const double * in, double * out) { VMLFN_CALL(dLn, HAynn, ((int)n, in, out)); } + static void vLog(SizeType n, const double * in, double * out) + { + __DAAL_MKLFN_CALL_MATH(vmdLn, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + } - static void vLog1p(SizeType n, const double * in, double * out) { VMLFN_CALL(dLog1p, HAynn, ((int)n, in, out)); } + static void vLog1p(SizeType n, const double * in, double * out) + { + __DAAL_MKLFN_CALL_MATH(vmdLog1p, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + } - static void vCdfNormInv(SizeType n, const double * in, double * out) { VMLFN_CALL(dCdfNormInv, HAynn, ((int)n, in, out)); } + static void vCdfNormInv(SizeType n, const double * in, double * out) + { + __DAAL_MKLFN_CALL_MATH(vmdCdfNormInv, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + } }; /* @@ -217,27 +213,57 @@ struct MklMath return r; } - static void vPowx(SizeType n, const float * in, float in1, float * out) { VMLFN_CALL(sPowx, HAynn, ((int)n, in, in1, out)); } + static void vPowx(SizeType n, const float * in, float in1, float * out) + { + __DAAL_MKLFN_CALL_MATH(vmsPowx, ((int)n, in, in1, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + } - static void vCeil(SizeType n, const float * in, float * out) { VMLFN_CALL(sCeil, HAynn, ((int)n, in, out)); } + static void vCeil(SizeType n, const float * in, float * out) + { + __DAAL_MKLFN_CALL_MATH(vmsCeil, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + } - static void vErfInv(SizeType n, const float * in, float * out) { VMLFN_CALL(sErfInv, HAynn, ((int)n, in, out)); } + static void vErfInv(SizeType n, const float * in, float * out) + { + __DAAL_MKLFN_CALL_MATH(vmsErfInv, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + } - static void vErf(SizeType n, const float * in, float * out) { VMLFN_CALL(sErf, HAynn, ((int)n, in, out)); } + static void vErf(SizeType n, const float * in, float * out) + { + __DAAL_MKLFN_CALL_MATH(vmsErf, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + } - static void vExp(SizeType n, const float * in, float * out) { VMLFN_CALL(sExp, HAynn, ((int)n, in, out)); } + static void vExp(SizeType n, const float * in, float * out) + { + __DAAL_MKLFN_CALL_MATH(vmsExp, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + } static float vExpThreshold() { return -75.0f; } - static void vTanh(SizeType n, const float * in, float * out) { VMLFN_CALL(sTanh, HAynn, ((int)n, in, out)); } + static void vTanh(SizeType n, const float * in, float * out) + { + __DAAL_MKLFN_CALL_MATH(vmsTanh, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + } - static void vSqrt(SizeType n, const float * in, float * out) { VMLFN_CALL(sSqrt, HAynn, ((int)n, in, out)); } + static void vSqrt(SizeType n, const float * in, float * out) + { + __DAAL_MKLFN_CALL_MATH(vmsSqrt, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + } - static void vLog(SizeType n, const float * in, float * out) { VMLFN_CALL(sLn, HAynn, ((int)n, in, out)); } + static void vLog(SizeType n, const float * in, float * out) + { + __DAAL_MKLFN_CALL_MATH(vmsLn, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + } - static void vLog1p(SizeType n, const float * in, float * out) { VMLFN_CALL(sLog1p, HAynn, ((int)n, in, out)); } + static void vLog1p(SizeType n, const float * in, float * out) + { + __DAAL_MKLFN_CALL_MATH(vmsLog1p, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + } - static void vCdfNormInv(SizeType n, const float * in, float * out) { VMLFN_CALL(sCdfNormInv, HAynn, ((int)n, in, out)); } + static void vCdfNormInv(SizeType n, const float * in, float * out) + { + __DAAL_MKLFN_CALL_MATH(vmsCdfNormInv, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + } }; } // namespace mkl diff --git a/cpp/daal/src/externals/service_rng_mkl.h b/cpp/daal/src/externals/service_rng_mkl.h index f8ddfba394d..b2dcd81b78b 100644 --- a/cpp/daal/src/externals/service_rng_mkl.h +++ b/cpp/daal/src/externals/service_rng_mkl.h @@ -24,7 +24,7 @@ #ifndef __SERVICE_RNG_MKL_H__ #define __SERVICE_RNG_MKL_H__ -#include "vmlvsl.h" +#include #include "src/externals/service_stat_rng_mkl.h" #include "src/externals/service_rng_common.h" @@ -67,7 +67,7 @@ int uniformRNG(const size_t cn, size_t * r, void * stream, const size_t a, const int nb = len / 2; int nn = (int)n; int * rr = (int *)r; - __DAAL_VSLFN_CALL_NR_WHILE(fpk_vsl_kernel, iRngUniform, (method, stream, nn, rr, na, nb), errcode); + __DAAL_VSLFN_CALL_NR_WHILE(iRngUniform, ((const MKL_INT)method, stream, (const MKL_INT)nn, rr, na, nb), errcode); if (errcode != 0) { @@ -90,7 +90,7 @@ int uniformRNG(const size_t cn, size_t * r, void * stream, const size_t a, const int nb = len / 2; int nn = (int)n; int * rr = (int *)r + n; - __DAAL_VSLFN_CALL_NR_WHILE(fpk_vsl_kernel, iRngUniform, (method, stream, nn, rr, na, nb), errcode); + __DAAL_VSLFN_CALL_NR_WHILE(iRngUniform, ((const MKL_INT)method, stream, (const MKL_INT)nn, rr, na, nb), errcode); if (errcode != 0) { @@ -120,7 +120,7 @@ int uniformRNG(const size_t cn, size_t * r, void * stream, const size_t a, const for (int i = 0; i < 64; i++) dv /= 2.0; int nn = (int)n; unsigned __int64 * rr = cr; - __DAAL_VSLFN_CALL_NR_WHILE(fpk_vsl_kernel, iRngUniformBits64, (method, stream, nn, rr), errcode); + __DAAL_VSLFN_CALL_NR_WHILE(iRngUniformBits64, ((const MKL_INT)method, stream, (const MKL_INT)nn, (unsigned MKL_INT64 *)rr), errcode); if (errcode != 0) { @@ -136,7 +136,8 @@ int uniformRNG(const size_t cn, size_t * r, void * stream, const size_t a, const n = cn - pos; int nn = (int)n; unsigned __int64 * rr = cr + pos; - __DAAL_VSLFN_CALL_NR_WHILE(fpk_vsl_kernel, iRngUniformBits64, (method, stream, nn, rr), errcode); + __DAAL_VSLFN_CALL_NR_WHILE(iRngUniformBits64, ((const MKL_INT)method, stream, (const MKL_INT)nn, (unsigned MKL_INT64 *)rr), + errcode); if (errcode != 0) { @@ -169,7 +170,7 @@ int uniformRNG(const size_t n, int * r, void * stream, const int a, const int b, int errcode = 0; int nn = (int)n; int * rr = r; - __DAAL_VSLFN_CALL_NR_WHILE(fpk_vsl_kernel, iRngUniform, (method, stream, nn, rr, a, b), errcode); + __DAAL_VSLFN_CALL_NR_WHILE(iRngUniform, ((const MKL_INT)method, stream, (const MKL_INT)nn, rr, a, b), errcode); return errcode; } @@ -179,7 +180,7 @@ int uniformRNG(const size_t n, float * r, void * stream, const float a, const fl int errcode = 0; int nn = (int)n; float * rr = r; - __DAAL_VSLFN_CALL_NR_WHILE(fpk_vsl_kernel, sRngUniform, (method, stream, nn, rr, a, b), errcode); + __DAAL_VSLFN_CALL_NR_WHILE(sRngUniform, ((const MKL_INT)method, stream, (const MKL_INT)nn, rr, a, b), errcode); return errcode; } @@ -189,7 +190,7 @@ int uniformRNG(const size_t n, double * r, void * stream, const double a, const int errcode = 0; int nn = (int)n; double * rr = r; - __DAAL_VSLFN_CALL_NR_WHILE(fpk_vsl_kernel, dRngUniform, (method, stream, nn, rr, a, b), errcode); + __DAAL_VSLFN_CALL_NR_WHILE(dRngUniform, ((const MKL_INT)method, stream, (const MKL_INT)nn, rr, a, b), errcode); return errcode; } @@ -199,7 +200,7 @@ int uniformBits32RNG(const size_t n, unsigned int * r, void * stream, const int int errcode = 0; int nn = (int)n; unsigned int * rr = r; - __DAAL_VSLFN_CALL_NR_WHILE(fpk_vsl_kernel, iRngUniformBits32, (method, stream, nn, rr), errcode); + __DAAL_VSLFN_CALL_NR_WHILE(iRngUniformBits32, ((const MKL_INT)method, stream, (const MKL_INT)nn, rr), errcode); return errcode; } @@ -213,7 +214,7 @@ int gaussianRNG(const size_t n, float * r, void * stream, const float a, const f int errcode = 0; int nn = (int)n; float * rr = r; - __DAAL_VSLFN_CALL_NR_WHILE(fpk_vsl_kernel, sRngGaussian, (method, stream, nn, rr, a, sigma), errcode); + __DAAL_VSLFN_CALL_NR_WHILE(sRngGaussian, ((const MKL_INT)method, stream, (const MKL_INT)nn, rr, a, sigma), errcode); return errcode; } @@ -223,7 +224,7 @@ int gaussianRNG(const size_t n, double * r, void * stream, const double a, const int errcode = 0; int nn = (int)n; double * rr = r; - __DAAL_VSLFN_CALL_NR_WHILE(fpk_vsl_kernel, dRngGaussian, (method, stream, nn, rr, a, sigma), errcode); + __DAAL_VSLFN_CALL_NR_WHILE(dRngGaussian, ((const MKL_INT)method, stream, (const MKL_INT)nn, rr, a, sigma), errcode); return errcode; } @@ -237,7 +238,7 @@ int bernoulliRNG(const size_t n, int * r, void * stream, const double p, const i int errcode = 0; int nn = (int)n; int * rr = r; - __DAAL_VSLFN_CALL_NR_WHILE(fpk_vsl_kernel, iRngBernoulli, (method, stream, nn, rr, p), errcode); + __DAAL_VSLFN_CALL_NR_WHILE(iRngBernoulli, ((const MKL_INT)method, stream, (const MKL_INT)nn, rr, p), errcode); return errcode; } @@ -252,7 +253,7 @@ class BaseRNG : public BaseRNGIface { _seed[0] = seed; int errcode = 0; - __DAAL_VSLFN_CALL_NR(fpk_vsl_sub_kernel, vslNewStreamEx, (&_stream, brngId, 1, &seed), errcode); + __DAAL_VSLFN_CALL_NR(vslNewStreamEx, (&_stream, (const MKL_INT)brngId, (const MKL_INT)1, &seed), errcode); } } @@ -270,7 +271,7 @@ class BaseRNG : public BaseRNGIface } } int errcode = 0; - __DAAL_VSLFN_CALL_NR(fpk_vsl_sub_kernel, vslNewStreamEx, (&_stream, brngId, n, seed), errcode); + __DAAL_VSLFN_CALL_NR(vslNewStreamEx, (&_stream, (const MKL_INT)brngId, (const MKL_INT)n, seed), errcode); } } @@ -284,8 +285,8 @@ class BaseRNG : public BaseRNGIface _seed[i] = other._seed[i]; } int errcode = 0; - __DAAL_VSLFN_CALL_NR(fpk_vsl_sub_kernel, vslNewStreamEx, (&_stream, _brngId, _seedSize, _seed), errcode); - if (!errcode) __DAAL_VSLFN_CALL_NR(fpk_vsl_sub_kernel, vslCopyStreamState, (_stream, other._stream), errcode); + __DAAL_VSLFN_CALL_NR(vslNewStreamEx, (&_stream, (const MKL_INT)_brngId, (const MKL_INT)_seedSize, _seed), errcode); + if (!errcode) __DAAL_VSLFN_CALL_NR(vslCopyStreamState, (_stream, other._stream), errcode); } } @@ -293,42 +294,42 @@ class BaseRNG : public BaseRNGIface { daal::services::daal_free((void *)_seed); int errcode = 0; - __DAAL_VSLFN_CALL_NR(fpk_vsl_sub_kernel, vslDeleteStream, (&_stream), errcode); + __DAAL_VSLFN_CALL_NR(vslDeleteStream, (&_stream), errcode); } int getStateSize() const { int res = 0; - __DAAL_VSLFN_CALL_NR(fpk_vsl_sub_kernel, vslGetStreamSize, (_stream), res); + __DAAL_VSLFN_CALL_NR(vslGetStreamSize, (_stream), res); return res; } int saveState(void * dest) const { int errcode = 0; - __DAAL_VSLFN_CALL_NR(fpk_vsl_sub_kernel, vslSaveStreamM, (_stream, (char *)dest), errcode); + __DAAL_VSLFN_CALL_NR(vslSaveStreamM, (_stream, (char *)dest), errcode); return errcode; } int loadState(const void * src) { int errcode = 0; - __DAAL_VSLFN_CALL_NR(fpk_vsl_sub_kernel, vslDeleteStream, (&_stream), errcode); - if (!errcode) __DAAL_VSLFN_CALL_NR(fpk_vsl_sub_kernel, vslLoadStreamM, (&_stream, (const char *)src), errcode); + __DAAL_VSLFN_CALL_NR(vslDeleteStream, (&_stream), errcode); + if (!errcode) __DAAL_VSLFN_CALL_NR(vslLoadStreamM, (&_stream, (const char *)src), errcode); return errcode; } int leapfrog(size_t threadNum, size_t nThreads) { int errcode = 0; - __DAAL_VSLFN_CALL_NR(fpk_vsl_sub_kernel, vslLeapfrogStream, (_stream, threadNum, nThreads), errcode); + __DAAL_VSLFN_CALL_NR(vslLeapfrogStream, (_stream, (const MKL_INT)threadNum, (const MKL_INT)nThreads), errcode); return errcode; } int skipAhead(size_t nSkip) { int errcode = 0; - __DAAL_VSLFN_CALL_NR(fpk_vsl_sub_kernel, vslSkipAheadStream, (_stream, nSkip), errcode); + __DAAL_VSLFN_CALL_NR(vslSkipAheadStream, (_stream, nSkip), errcode); return errcode; } diff --git a/cpp/daal/src/externals/service_service_mkl.h b/cpp/daal/src/externals/service_service_mkl.h index 335fe2a5c21..50ebd34dab5 100644 --- a/cpp/daal/src/externals/service_service_mkl.h +++ b/cpp/daal/src/externals/service_service_mkl.h @@ -25,9 +25,10 @@ #define __SERVICE_SERVICE_MKL_H__ #include "services/daal_defines.h" -#include "mkl_daal.h" -#include "istrconv_daal.h" -#include "istrconv_daal_el.h" +#include "src/services/service_topo.h" +#include +#include +#include namespace daal { @@ -37,49 +38,103 @@ namespace mkl { struct MklService { - static void * serv_malloc(size_t size, size_t alignment) { return fpk_serv_malloc(size, alignment); } + static void * serv_malloc(size_t size, size_t alignment) { return MKL_malloc(size, alignment); } - static void serv_free(void * ptr) { fpk_serv_free(ptr); } + static void serv_free(void * ptr) { MKL_free(ptr); } - static void serv_free_buffers() { fpk_serv_free_buffers(); } + static void serv_free_buffers() { MKL_Free_Buffers(); } static int serv_memcpy_s(void * dest, size_t destSize, const void * src, size_t srcSize) { - return fpk_serv_memcpy_s(dest, destSize, src, srcSize); + if (destSize < srcSize) return static_cast(ENOMEM); + memcpy(dest, src, srcSize); + return 0; + // TODO: safe funtion + // return memcpy_s(dest, destSize, src, srcSize); } - static int serv_memmove_s(void * dest, size_t destSize, const void * src, size_t smax) { return fpk_serv_memmove_s(dest, destSize, src, smax); } - - static int serv_get_ht() { return fpk_serv_get_ht(); } + static int serv_memmove_s(void * dest, size_t destSize, const void * src, size_t smax) + { + if (destSize < smax) return static_cast(ENOMEM); + memmove(dest, src, smax); + return 0; + // TODO: safe funtion + // return memmove_s(dest, destSize, src, smax); + } - static int serv_get_ncpus() { return fpk_serv_get_ncpus(); } + static int serv_get_ht() { return (serv_get_ncorespercpu() > 1 ? 1 : 0); } - static int serv_get_ncorespercpu() { return fpk_serv_get_ncorespercpu(); } + static int serv_get_ncpus() + { + unsigned int ncores = daal::services::internal::_internal_daal_GetProcessorCoreCount(); + return (ncores ? ncores : 1); + } - static int serv_set_memory_limit(int type, size_t limit) { return fpk_serv_set_memory_limit(type, limit); } + static int serv_get_ncorespercpu() + { + unsigned int nlogicalcpu = daal::services::internal::_internal_daal_GetProcessorCoreCount(); + unsigned int ncpus = serv_get_ncpus(); + return (ncpus > 0 && nlogicalcpu > 0 && nlogicalcpu > ncpus ? nlogicalcpu / ncpus : 1); + } + // TODO: The real call should be delegated to a backend library if the option is supported + static int serv_set_memory_limit(int type, size_t limit) { return MKL_Set_Memory_Limit(type, limit); } // Added for interface compatibility - not expected to be called - static size_t serv_strnlen_s(const char * src, size_t slen) + static size_t serv_strnlen_s(const char * src, size_t slen) { return strnlen(src, slen); } + + static int serv_strncpy_s(char * dest, size_t dmax, const char * src, size_t slen) { - size_t i = 0; - for (; i < slen && src[i] != '\0'; ++i) - ; - return i; + if (dmax < slen) return static_cast(ENOMEM); + strncpy(dest, src, slen); + return 0; } - static int serv_strncpy_s(char * dest, size_t dmax, const char * src, size_t slen) { return fpk_serv_strncpy_s(dest, dmax, src, slen); } - - static int serv_strncat_s(char * dest, size_t dmax, const char * src, size_t slen) { return fpk_serv_strncat_s(dest, dmax, src, slen); } + static int serv_strncat_s(char * dest, size_t dmax, const char * src, size_t slen) + { + if (dmax < slen) return static_cast(ENOMEM); + strncat(dest, src, slen); + return 0; + } - static float serv_string_to_float(const char * nptr, char ** endptr) { return __FPK_string_to_float(nptr, endptr); } + // TODO: not a safe function - no control for the input buffer end + static double serv_string_to_double(const char * nptr, char ** endptr) + { + const char * cur = nptr; + for (; isdigit(*cur) || *cur == '-' || *cur == 'e' || *cur == 'E' || *cur == '.'; ++cur) + ; + if (endptr) *endptr = const_cast(cur); + size_t size = cur - nptr; + // TODO replace with static buffer + char * buffer = static_cast(malloc(size + 1)); + for (size_t i = 0; i < size; ++i) buffer[i] = nptr[i]; + buffer[size] = '\0'; + double val = atof(buffer); + free(buffer); + return val; + } - static double serv_string_to_double(const char * nptr, char ** endptr) { return __FPK_string_to_double(nptr, endptr); } + static float serv_string_to_float(const char * nptr, char ** endptr) { return static_cast(serv_string_to_double(nptr, endptr)); } - static int serv_string_to_int(const char * nptr, char ** endptr) { return __FPK_string_to_int_generic(nptr, endptr); } + // TODO: not a safe function - no control for the input buffer end + static int serv_string_to_int(const char * nptr, char ** endptr) + { + const char * cur = nptr; + for (; isdigit(*cur) || *cur == '-'; ++cur) + ; + if (endptr) *endptr = const_cast(cur); + size_t size = cur - nptr; + // TODO replace with static buffer + char * buffer = static_cast(malloc(size + 1)); + for (size_t i = 0; i < size; ++i) buffer[i] = nptr[i]; + buffer[size] = '\0'; + int val = atoi(buffer); + free(buffer); + return val; + } - static int serv_int_to_string(char * buffer, size_t n, int value) { return __FPK_int_to_string(buffer, n, value); } + static int serv_int_to_string(char * buffer, size_t n, int value) { return snprintf(buffer, n, "%d", value); } - static int serv_double_to_string(char * buffer, size_t n, double value) { return __FPK_double_to_string_f(buffer, n, value); } + static int serv_double_to_string(char * buffer, size_t n, double value) { return snprintf(buffer, n, "%E", value); } }; } // namespace mkl diff --git a/cpp/daal/src/externals/service_service_ref.h b/cpp/daal/src/externals/service_service_ref.h index da67ef66e0f..a76884a5039 100644 --- a/cpp/daal/src/externals/service_service_ref.h +++ b/cpp/daal/src/externals/service_service_ref.h @@ -93,7 +93,7 @@ struct RefService { return 0; // Old one - just to see what the method is for - // return fpk_serv_set_memory_limit(type, limit); + // return mkl_serv_set_memory_limit(type, limit); } static size_t serv_strnlen_s(const char * src, size_t slen) diff --git a/cpp/daal/src/externals/service_spblas_mkl.h b/cpp/daal/src/externals/service_spblas_mkl.h index 6e2ca981572..9cf80a5b6aa 100644 --- a/cpp/daal/src/externals/service_spblas_mkl.h +++ b/cpp/daal/src/externals/service_spblas_mkl.h @@ -25,65 +25,7 @@ #define __SERVICE_SPBLAS_MKL_H__ #include "services/daal_defines.h" -#include "mkl_daal.h" - -#if !defined(__DAAL_CONCAT4) - #define __DAAL_CONCAT4(a, b, c, d) __DAAL_CONCAT41(a, b, c, d) - #define __DAAL_CONCAT41(a, b, c, d) a##b##c##d -#endif - -#if !defined(__DAAL_CONCAT5) - #define __DAAL_CONCAT5(a, b, c, d, e) __DAAL_CONCAT51(a, b, c, d, e) - #define __DAAL_CONCAT51(a, b, c, d, e) a##b##c##d##e -#endif - -#if defined(__APPLE__) - #define __DAAL_MKL_SSE2 avx_ - #define __DAAL_MKL_SSE42 avx_ -#else - #define __DAAL_MKL_SSE2 sse2_ - #define __DAAL_MKL_SSE42 sse42_ -#endif - -#define __DAAL_MKLFN(f_cpu, f_pref, f_name) __DAAL_CONCAT4(fpk_, f_pref, f_cpu, f_name) -#define __DAAL_MKLFN_CALL(f_pref, f_name, f_args) __DAAL_MKLFN_CALL1(f_pref, f_name, f_args) -#define __DAAL_MKLFN_CALL_RETURN(f_pref, f_name, f_args) __DAAL_MKLFN_CALL2(f_pref, f_name, f_args) - -#define __DAAL_MKLFN_CALL1(f_pref, f_name, f_args) \ - if (avx512 == cpu) \ - { \ - __DAAL_MKLFN(avx512_, f_pref, f_name) f_args; \ - } \ - if (avx2 == cpu) \ - { \ - __DAAL_MKLFN(avx2_, f_pref, f_name) f_args; \ - } \ - if (sse42 == cpu) \ - { \ - __DAAL_MKLFN(__DAAL_MKL_SSE42, f_pref, f_name) f_args; \ - } \ - if (sse2 == cpu) \ - { \ - __DAAL_MKLFN(__DAAL_MKL_SSE2, f_pref, f_name) f_args; \ - } - -#define __DAAL_MKLFN_CALL2(f_pref, f_name, f_args) \ - if (avx512 == cpu) \ - { \ - return __DAAL_MKLFN(avx512_, f_pref, f_name) f_args; \ - } \ - if (avx2 == cpu) \ - { \ - return __DAAL_MKLFN(avx2_, f_pref, f_name) f_args; \ - } \ - if (sse42 == cpu) \ - { \ - return __DAAL_MKLFN(__DAAL_MKL_SSE42, f_pref, f_name) f_args; \ - } \ - if (sse2 == cpu) \ - { \ - return __DAAL_MKLFN(__DAAL_MKL_SSE2, f_pref, f_name) f_args; \ - } +#include namespace daal { @@ -107,29 +49,96 @@ struct MklSpBlas static void xcsrmultd(const char * transa, const DAAL_INT * m, const DAAL_INT * n, const DAAL_INT * k, double * a, DAAL_INT * ja, DAAL_INT * ia, double * b, DAAL_INT * jb, DAAL_INT * ib, double * c, DAAL_INT * ldc) { - __DAAL_MKLFN_CALL(spblas_, mkl_dcsrmultd, (transa, m, n, k, a, ja, ia, b, jb, ib, c, ldc)); + sparse_matrix_t csrA = NULL; + struct matrix_descr descrA; + descrA.type = SPARSE_MATRIX_TYPE_GENERAL; + mkl_sparse_d_create_csr(&csrA, SPARSE_INDEX_BASE_ONE, (const MKL_INT)*m, (const MKL_INT)*n, (MKL_INT *)ia, (MKL_INT *)ia + 1, (MKL_INT *)ja, + a); + + sparse_matrix_t csrB = NULL; + struct matrix_descr descrB; + descrB.type = SPARSE_MATRIX_TYPE_GENERAL; + mkl_sparse_d_create_csr(&csrB, SPARSE_INDEX_BASE_ONE, (const MKL_INT)*m, (const MKL_INT)*k, (MKL_INT *)ib, (MKL_INT *)ib + 1, (MKL_INT *)jb, + b); + + if (*transa == 'n' || *transa == 'N') + { + mkl_sparse_d_spmmd(SPARSE_OPERATION_NON_TRANSPOSE, csrA, csrB, SPARSE_LAYOUT_COLUMN_MAJOR, c, (const MKL_INT)*ldc); + } + else + { + mkl_sparse_d_spmmd(SPARSE_OPERATION_TRANSPOSE, csrA, csrB, SPARSE_LAYOUT_COLUMN_MAJOR, c, (const MKL_INT)*ldc); + } + mkl_sparse_destroy(csrA); + mkl_sparse_destroy(csrB); } static void xcsrmv(const char * transa, const DAAL_INT * m, const DAAL_INT * k, const double * alpha, const char * matdescra, const double * val, const DAAL_INT * indx, const DAAL_INT * pntrb, const DAAL_INT * pntre, const double * x, const double * beta, double * y) { - __DAAL_MKLFN_CALL(spblas_, mkl_dcsrmv, (transa, m, k, alpha, matdescra, val, indx, pntrb, pntre, x, beta, y)); + sparse_matrix_t csrA = NULL; + struct matrix_descr descrA; + descrA.type = SPARSE_MATRIX_TYPE_GENERAL; + mkl_sparse_d_create_csr(&csrA, SPARSE_INDEX_BASE_ONE, (const MKL_INT)*m, (const MKL_INT)*k, (MKL_INT *)pntre, (MKL_INT *)pntrb, + (MKL_INT *)indx, (double *)val); + if (*transa == 'n' || *transa == 'N') + { + mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, *alpha, csrA, descrA, x, *beta, y); + } + else + { + mkl_sparse_d_mv(SPARSE_OPERATION_TRANSPOSE, *alpha, csrA, descrA, x, *beta, y); + } + mkl_sparse_destroy(csrA); } static void xcsrmm(const char * transa, const DAAL_INT * m, const DAAL_INT * n, const DAAL_INT * k, const double * alpha, const char * matdescra, const double * val, const DAAL_INT * indx, const DAAL_INT * pntrb, const double * b, const DAAL_INT * ldb, const double * beta, double * c, const DAAL_INT * ldc) { - __DAAL_MKLFN_CALL(spblas_, mkl_dcsrmm, (transa, m, n, k, alpha, matdescra, val, indx, pntrb, pntrb + 1, b, ldb, beta, c, ldc)); + sparse_matrix_t csrA = NULL; + struct matrix_descr descrA; + descrA.type = SPARSE_MATRIX_TYPE_GENERAL; + mkl_sparse_d_create_csr(&csrA, SPARSE_INDEX_BASE_ONE, (const MKL_INT)*m, (const MKL_INT)*k, (MKL_INT *)pntrb, (MKL_INT *)(pntrb + 1), + (MKL_INT *)indx, (double *)val); + + if (*transa == 'n' || *transa == 'N') + { + mkl_sparse_d_mm(SPARSE_OPERATION_NON_TRANSPOSE, *alpha, csrA, descrA, SPARSE_LAYOUT_COLUMN_MAJOR, b, (const MKL_INT)*n, + (const MKL_INT)*ldb, *beta, c, (const MKL_INT)*ldc); + } + else + { + mkl_sparse_d_mm(SPARSE_OPERATION_TRANSPOSE, *alpha, csrA, descrA, SPARSE_LAYOUT_COLUMN_MAJOR, b, (const MKL_INT)*n, (const MKL_INT)*ldb, + *beta, c, (const MKL_INT)*ldc); + } + mkl_sparse_destroy(csrA); } static void xxcsrmm(const char * transa, const DAAL_INT * m, const DAAL_INT * n, const DAAL_INT * k, const double * alpha, const char * matdescra, const double * val, const DAAL_INT * indx, const DAAL_INT * pntrb, const double * b, const DAAL_INT * ldb, const double * beta, double * c, const DAAL_INT * ldc) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(spblas_, mkl_dcsrmm, (transa, m, n, k, alpha, matdescra, val, indx, pntrb, pntrb + 1, b, ldb, beta, c, ldc)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + sparse_matrix_t csrA = NULL; + struct matrix_descr descrA; + descrA.type = SPARSE_MATRIX_TYPE_GENERAL; + mkl_sparse_d_create_csr(&csrA, SPARSE_INDEX_BASE_ONE, (const MKL_INT)*m, (const MKL_INT)*k, (MKL_INT *)pntrb, (MKL_INT *)(pntrb + 1), + (MKL_INT *)indx, (double *)val); + + if (*transa == 'n' || *transa == 'N') + { + mkl_sparse_d_mm(SPARSE_OPERATION_NON_TRANSPOSE, *alpha, csrA, descrA, SPARSE_LAYOUT_COLUMN_MAJOR, b, (const MKL_INT)*n, + (const MKL_INT)*ldb, *beta, c, (const MKL_INT)*ldc); + } + else + { + mkl_sparse_d_mm(SPARSE_OPERATION_TRANSPOSE, *alpha, csrA, descrA, SPARSE_LAYOUT_COLUMN_MAJOR, b, (const MKL_INT)*n, (const MKL_INT)*ldb, + *beta, c, (const MKL_INT)*ldc); + } + mkl_sparse_destroy(csrA); + + mkl_set_num_threads_local(old_nthr); } }; @@ -145,29 +154,97 @@ struct MklSpBlas static void xcsrmultd(const char * transa, const DAAL_INT * m, const DAAL_INT * n, const DAAL_INT * k, float * a, DAAL_INT * ja, DAAL_INT * ia, float * b, DAAL_INT * jb, DAAL_INT * ib, float * c, DAAL_INT * ldc) { - __DAAL_MKLFN_CALL(spblas_, mkl_scsrmultd, (transa, m, n, k, a, ja, ia, b, jb, ib, c, ldc)); + sparse_matrix_t csrA = NULL; + struct matrix_descr descrA; + descrA.type = SPARSE_MATRIX_TYPE_GENERAL; + mkl_sparse_s_create_csr(&csrA, SPARSE_INDEX_BASE_ONE, (const MKL_INT)*m, (const MKL_INT)*n, (MKL_INT *)ia, (MKL_INT *)ia + 1, (MKL_INT *)ja, + a); + + sparse_matrix_t csrB = NULL; + struct matrix_descr descrB; + descrB.type = SPARSE_MATRIX_TYPE_GENERAL; + mkl_sparse_s_create_csr(&csrB, SPARSE_INDEX_BASE_ONE, (const MKL_INT)*m, (const MKL_INT)*k, (MKL_INT *)ib, (MKL_INT *)ib + 1, (MKL_INT *)jb, + b); + + if (*transa == 'n' || *transa == 'N') + { + mkl_sparse_s_spmmd(SPARSE_OPERATION_NON_TRANSPOSE, csrA, csrB, SPARSE_LAYOUT_COLUMN_MAJOR, c, (const MKL_INT)*ldc); + } + else + { + mkl_sparse_s_spmmd(SPARSE_OPERATION_TRANSPOSE, csrA, csrB, SPARSE_LAYOUT_COLUMN_MAJOR, c, (const MKL_INT)*ldc); + } + mkl_sparse_destroy(csrA); + mkl_sparse_destroy(csrB); } static void xcsrmv(const char * transa, const DAAL_INT * m, const DAAL_INT * k, const float * alpha, const char * matdescra, const float * val, const DAAL_INT * indx, const DAAL_INT * pntrb, const DAAL_INT * pntre, const float * x, const float * beta, float * y) { - __DAAL_MKLFN_CALL(spblas_, mkl_scsrmv, (transa, m, k, alpha, matdescra, val, indx, pntrb, pntre, x, beta, y)); + sparse_matrix_t csrA = NULL; + struct matrix_descr descrA; + descrA.type = SPARSE_MATRIX_TYPE_GENERAL; + mkl_sparse_s_create_csr(&csrA, SPARSE_INDEX_BASE_ONE, (const MKL_INT)*m, (const MKL_INT)*k, (MKL_INT *)pntre, (MKL_INT *)pntrb, + (MKL_INT *)indx, (float *)val); + + if (*transa == 'n' || *transa == 'N') + { + mkl_sparse_s_mv(SPARSE_OPERATION_NON_TRANSPOSE, *alpha, csrA, descrA, x, *beta, y); + } + else + { + mkl_sparse_s_mv(SPARSE_OPERATION_TRANSPOSE, *alpha, csrA, descrA, x, *beta, y); + } + mkl_sparse_destroy(csrA); } static void xcsrmm(const char * transa, const DAAL_INT * m, const DAAL_INT * n, const DAAL_INT * k, const float * alpha, const char * matdescra, const float * val, const DAAL_INT * indx, const DAAL_INT * pntrb, const float * b, const DAAL_INT * ldb, const float * beta, float * c, const DAAL_INT * ldc) { - __DAAL_MKLFN_CALL(spblas_, mkl_scsrmm, (transa, m, n, k, alpha, matdescra, val, indx, pntrb, pntrb + 1, b, ldb, beta, c, ldc)); + sparse_matrix_t csrA = NULL; + struct matrix_descr descrA; + descrA.type = SPARSE_MATRIX_TYPE_GENERAL; + mkl_sparse_s_create_csr(&csrA, SPARSE_INDEX_BASE_ONE, (const MKL_INT)*m, (const MKL_INT)*k, (MKL_INT *)pntrb, (MKL_INT *)(pntrb + 1), + (MKL_INT *)indx, (float *)val); + + if (*transa == 'n' || *transa == 'N') + { + mkl_sparse_s_mm(SPARSE_OPERATION_NON_TRANSPOSE, *alpha, csrA, descrA, SPARSE_LAYOUT_COLUMN_MAJOR, b, (const MKL_INT)*n, + (const MKL_INT)*ldb, *beta, c, (const MKL_INT)*ldc); + } + else + { + mkl_sparse_s_mm(SPARSE_OPERATION_TRANSPOSE, *alpha, csrA, descrA, SPARSE_LAYOUT_COLUMN_MAJOR, b, (const MKL_INT)*n, (const MKL_INT)*ldb, + *beta, c, (const MKL_INT)*ldc); + } + mkl_sparse_destroy(csrA); } static void xxcsrmm(const char * transa, const DAAL_INT * m, const DAAL_INT * n, const DAAL_INT * k, const float * alpha, const char * matdescra, const float * val, const DAAL_INT * indx, const DAAL_INT * pntrb, const float * b, const DAAL_INT * ldb, const float * beta, float * c, const DAAL_INT * ldc) { - int old_threads = fpk_serv_set_num_threads_local(1); - __DAAL_MKLFN_CALL(spblas_, mkl_scsrmm, (transa, m, n, k, alpha, matdescra, val, indx, pntrb, pntrb + 1, b, ldb, beta, c, ldc)); - fpk_serv_set_num_threads_local(old_threads); + int old_nthr = mkl_set_num_threads_local(1); + sparse_matrix_t csrA = NULL; + struct matrix_descr descrA; + descrA.type = SPARSE_MATRIX_TYPE_GENERAL; + mkl_sparse_s_create_csr(&csrA, SPARSE_INDEX_BASE_ONE, (const MKL_INT)*m, (const MKL_INT)*k, (MKL_INT *)pntrb, (MKL_INT *)(pntrb + 1), + (MKL_INT *)indx, (float *)val); + + if (*transa == 'n' || *transa == 'N') + { + mkl_sparse_s_mm(SPARSE_OPERATION_NON_TRANSPOSE, *alpha, csrA, descrA, SPARSE_LAYOUT_COLUMN_MAJOR, b, (const MKL_INT)*n, + (const MKL_INT)*ldb, *beta, c, (const MKL_INT)*ldc); + } + else + { + mkl_sparse_s_mm(SPARSE_OPERATION_TRANSPOSE, *alpha, csrA, descrA, SPARSE_LAYOUT_COLUMN_MAJOR, b, (const MKL_INT)*n, (const MKL_INT)*ldb, + *beta, c, (const MKL_INT)*ldc); + } + mkl_sparse_destroy(csrA); + + mkl_set_num_threads_local(old_nthr); } }; diff --git a/cpp/daal/src/externals/service_stat_mkl.h b/cpp/daal/src/externals/service_stat_mkl.h index 05ab508589a..83160afd15a 100644 --- a/cpp/daal/src/externals/service_stat_mkl.h +++ b/cpp/daal/src/externals/service_stat_mkl.h @@ -24,10 +24,16 @@ #ifndef __SERVICE_STAT_MKL_H__ #define __SERVICE_STAT_MKL_H__ -#include "vmlvsl.h" +#include +#include #include "src/externals/service_memory.h" #include "src/externals/service_stat_rng_mkl.h" +typedef void (*func_type)(DAAL_INT, DAAL_INT, DAAL_INT, void *); + +#undef __DAAL_VSLFN_CALL +#define __DAAL_VSLFN_CALL(f_name, f_args, errcode) errcode = f_name f_args; + #if defined(_WIN64) || defined(__x86_64__) #define __SS_ILP_FLAG__ 1 #else @@ -77,81 +83,6 @@ extern "C" #define __DAAL_VSL_SS_ERROR_BAD_QUANT_ORDER VSL_SS_ERROR_BAD_QUANT_ORDER #define __DAAL_VSL_SS_ERROR_INDICES_NOT_SUPPORTED VSL_SS_ERROR_INDICES_NOT_SUPPORTED - - typedef void (*threadfuncfor)(DAAL_INT, DAAL_INT, void *, func_type); - typedef void (*threadfuncforordered)(DAAL_INT, DAAL_INT, void *, func_type); - typedef void (*threadfuncsection)(DAAL_INT, void *, func_type); - typedef void (*threadfuncordered)(DAAL_INT, DAAL_INT, DAAL_INT, void *, func_type); - typedef DAAL_INT (*threadgetlimit)(void); - - struct ThreadingFuncs - { - threadfuncfor funcfor; - threadfuncfor funcforordered; - threadfuncsection funcsection; - threadfuncordered funcordered; - threadgetlimit getlimit; - }; - - [[maybe_unused]] static void _daal_mkl_threader_for_sequential(DAAL_INT n, DAAL_INT threads_request, void * a, func_type func) - { - DAAL_INT i; - - for (i = 0; i < n; i++) - { - func(i, 0, 1, a); - } - } - - [[maybe_unused]] static void _daal_mkl_threader_for_ordered_sequential(DAAL_INT n, DAAL_INT threads_request, void * a, func_type func) - { - DAAL_INT i; - - for (i = 0; i < n; i++) - { - func(i, 0, 1, a); - } - } - - [[maybe_unused]] static void _daal_mkl_threader_sections_sequential(DAAL_INT threads_request, void * a, func_type func) - { - func(0, 0, 1, a); - } - - [[maybe_unused]] static void _daal_mkl_threader_ordered_sequential(DAAL_INT i, DAAL_INT th_idx, DAAL_INT th_num, void * a, func_type func) - { - func(i, th_idx, th_num, a); - } - - [[maybe_unused]] static DAAL_INT _daal_mkl_threader_get_max_threads_sequential() - { - return 1; - } - - [[maybe_unused]] static void _daal_mkl_threader_for(DAAL_INT n, DAAL_INT threads_request, void * a, func_type func) - { - fpk_vsl_serv_threader_for(n, threads_request, a, func); - } - - [[maybe_unused]] static void _daal_mkl_threader_for_ordered(DAAL_INT n, DAAL_INT threads_request, void * a, func_type func) - { - fpk_vsl_serv_threader_for_ordered(n, threads_request, a, func); - } - - [[maybe_unused]] static void _daal_mkl_threader_sections(DAAL_INT threads_request, void * a, func_type func) - { - fpk_vsl_serv_threader_sections(threads_request, a, func); - } - - [[maybe_unused]] static void _daal_mkl_threader_ordered(DAAL_INT i, DAAL_INT th_idx, DAAL_INT th_num, void * a, func_type func) - { - //not used. To be implemented if needed. - } - - [[maybe_unused]] static DAAL_INT _daal_mkl_threader_get_max_threads() - { - return fpk_vsl_serv_threader_get_num_threads_limit(); - } } namespace daal @@ -178,7 +109,7 @@ struct MklStatistics static int xcp(double * data, __int64 nFeatures, __int64 nVectors, double * nPreviousObservations, double * sum, double * crossProduct, __int64 method) { - DAAL_VSLSSTaskPtr task; + VSLSSTaskPtr task; int errcode = 0; __int64 dataStorage = __DAAL_VSL_SS_MATRIX_STORAGE_COLS; @@ -197,18 +128,15 @@ struct MklStatistics double weight[2] = { *nPreviousObservations, *nPreviousObservations }; - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSNewTask, (&task, &nFeatures, &nVectors, &dataStorage, data, 0, 0, __SS_ILP_FLAG__), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_SUM, sum), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_MEAN, mean), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_CP, crossProduct), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsliSSEditTask, (task, __DAAL_VSL_SS_ED_CP_STORAGE, &cpStorage), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_ACCUM_WEIGHT, weight), errcode); - - ThreadingFuncs threading = { _daal_mkl_threader_for, _daal_mkl_threader_for_ordered, _daal_mkl_threader_sections, _daal_mkl_threader_ordered, - _daal_mkl_threader_get_max_threads }; - - __DAAL_VSLFN_CALL(fpk_vsl_kernel, dSSBasic, (task, __DAAL_VSL_SS_CP | __DAAL_VSL_SS_SUM, method, &threading), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vsldSSNewTask, (&task, (const MKL_INT *)&nFeatures, (const MKL_INT *)&nVectors, (const MKL_INT *)&dataStorage, data, 0, 0), + errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_SUM, sum), errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_MEAN, mean), errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_CP, crossProduct), errcode); + __DAAL_VSLFN_CALL(vsliSSEditTask, (task, __DAAL_VSL_SS_ED_CP_STORAGE, (const MKL_INT *)&cpStorage), errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_ACCUM_WEIGHT, weight), errcode); + __DAAL_VSLFN_CALL(vsldSSCompute, (task, __DAAL_VSL_SS_CP | __DAAL_VSL_SS_SUM, method), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); daal::services::daal_free(mean); mean = NULL; @@ -218,7 +146,7 @@ struct MklStatistics static int xxcp_weight(double * data, __int64 nFeatures, __int64 nVectors, double * weight, double * accumWeight, double * mean, double * crossProduct, __int64 method) { - DAAL_VSLSSTaskPtr task; + VSLSSTaskPtr task; int errcode = 0; __int64 dataStorage = __DAAL_VSL_SS_MATRIX_STORAGE_COLS; @@ -228,20 +156,16 @@ struct MklStatistics double accumWeightsAll[2] = { 0, 0 }; - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSNewTask, (&task, &nFeatures, &nVectors, &dataStorage, data, 0, 0, __SS_ILP_FLAG__), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_SUM, sum), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_MEAN, mean), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_WEIGHTS, weight), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_CP, crossProduct), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsliSSEditTask, (task, __DAAL_VSL_SS_ED_CP_STORAGE, &cpStorage), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_ACCUM_WEIGHT, accumWeightsAll), errcode); - - ThreadingFuncs threading = { _daal_mkl_threader_for_sequential, _daal_mkl_threader_for_ordered_sequential, - _daal_mkl_threader_sections_sequential, _daal_mkl_threader_ordered_sequential, - _daal_mkl_threader_get_max_threads_sequential }; - - __DAAL_VSLFN_CALL(fpk_vsl_kernel, dSSBasic, (task, __DAAL_VSL_SS_CP | __DAAL_VSL_SS_MEAN, method, &threading), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vsldSSNewTask, (&task, (const MKL_INT *)&nFeatures, (const MKL_INT *)&nVectors, (const MKL_INT *)&dataStorage, data, 0, 0), + errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_SUM, sum), errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_MEAN, mean), errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_WEIGHTS, weight), errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_CP, crossProduct), errcode); + __DAAL_VSLFN_CALL(vsliSSEditTask, (task, __DAAL_VSL_SS_ED_CP_STORAGE, (const MKL_INT *)&cpStorage), errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_ACCUM_WEIGHT, accumWeightsAll), errcode); + __DAAL_VSLFN_CALL(vsldSSCompute, (task, __DAAL_VSL_SS_CP | __DAAL_VSL_SS_MEAN, method), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); *accumWeight = accumWeightsAll[0]; @@ -253,7 +177,7 @@ struct MklStatistics static int xxvar_weight(double * data, __int64 nFeatures, __int64 nVectors, double * weight, double * accumWeight, double * mean, double * sampleVariance, __int64 method) { - DAAL_VSLSSTaskPtr task; + VSLSSTaskPtr task; int errcode = 0; __int64 dataStorage = __DAAL_VSL_SS_MATRIX_STORAGE_COLS; @@ -263,20 +187,16 @@ struct MklStatistics double accumWeightsAll[2] = { 0, 0 }; - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSNewTask, (&task, &nFeatures, &nVectors, &dataStorage, data, 0, 0, __SS_ILP_FLAG__), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_SUM, sum), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_MEAN, mean), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_WEIGHTS, weight), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_2C_SUM, sampleVariance), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_2R_MOM, rawSecond), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_ACCUM_WEIGHT, accumWeightsAll), errcode); - - ThreadingFuncs threading = { _daal_mkl_threader_for_sequential, _daal_mkl_threader_for_ordered_sequential, - _daal_mkl_threader_sections_sequential, _daal_mkl_threader_ordered_sequential, - _daal_mkl_threader_get_max_threads_sequential }; - - __DAAL_VSLFN_CALL(fpk_vsl_kernel, dSSBasic, (task, __DAAL_VSL_SS_2C_SUM | __DAAL_VSL_SS_MEAN, method, &threading), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vsldSSNewTask, (&task, (const MKL_INT *)&nFeatures, (const MKL_INT *)&nVectors, (const MKL_INT *)&dataStorage, data, 0, 0), + errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_SUM, sum), errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_MEAN, mean), errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_WEIGHTS, weight), errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_2C_SUM, sampleVariance), errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_2R_MOM, rawSecond), errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_ACCUM_WEIGHT, accumWeightsAll), errcode); + __DAAL_VSLFN_CALL(vsldSSCompute, (task, __DAAL_VSL_SS_2C_SUM | __DAAL_VSL_SS_MEAN, method), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); *accumWeight = accumWeightsAll[0]; @@ -288,7 +208,7 @@ struct MklStatistics static int x2c_mom(const double * data, const __int64 nFeatures, const __int64 nVectors, double * variance, const __int64 method) { - DAAL_VSLSSTaskPtr task; + VSLSSTaskPtr task; int errcode = 0; __int64 dataStorage = __DAAL_VSL_SS_MATRIX_STORAGE_COLS; @@ -296,16 +216,13 @@ struct MklStatistics double * mean = (double *)daal::services::daal_malloc(nFeatures * sizeof(double)); double * secondOrderRawMoment = (double *)daal::services::daal_malloc(nFeatures * sizeof(double)); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSNewTask, (&task, &nFeatures, &nVectors, &dataStorage, data, 0, 0, __SS_ILP_FLAG__), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_MEAN, mean), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_2R_MOM, secondOrderRawMoment), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_2C_MOM, variance), errcode); - - ThreadingFuncs threading = { _daal_mkl_threader_for, _daal_mkl_threader_for_ordered, _daal_mkl_threader_sections, _daal_mkl_threader_ordered, - _daal_mkl_threader_get_max_threads }; - - __DAAL_VSLFN_CALL(fpk_vsl_kernel, dSSBasic, (task, __DAAL_VSL_SS_2C_MOM, method, &threading), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vsldSSNewTask, (&task, (const MKL_INT *)&nFeatures, (const MKL_INT *)&nVectors, (const MKL_INT *)&dataStorage, data, 0, 0), + errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_MEAN, mean), errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_2R_MOM, secondOrderRawMoment), errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_2C_MOM, variance), errcode); + __DAAL_VSLFN_CALL(vsldSSCompute, (task, __DAAL_VSL_SS_2C_MOM, method), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); daal::services::daal_free(mean); daal::services::daal_free(secondOrderRawMoment); @@ -317,164 +234,149 @@ struct MklStatistics static int xoutlierdetection(const double * data, const __int64 nFeatures, const __int64 nVectors, const __int64 nParams, const double * baconParams, double * baconWeights) { - DAAL_VSLSSTaskPtr task; + VSLSSTaskPtr task; int errcode = 0; __int64 dataStorage = __DAAL_VSL_SS_MATRIX_STORAGE_COLS; - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSNewTask, (&task, &nFeatures, &nVectors, &dataStorage, data, 0, 0, __SS_ILP_FLAG__), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditOutDetect, (task, &nParams, baconParams, baconWeights), errcode); - - ThreadingFuncs threading = { _daal_mkl_threader_for, _daal_mkl_threader_for_ordered, _daal_mkl_threader_sections, _daal_mkl_threader_ordered, - _daal_mkl_threader_get_max_threads }; - - __DAAL_VSLFN_CALL(fpk_vsl_kernel, dSSOutliersDetection, (task, __DAAL_VSL_SS_OUTLIERS, __DAAL_VSL_SS_METHOD_BACON, &threading), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vsldSSNewTask, (&task, (const MKL_INT *)&nFeatures, (const MKL_INT *)&nVectors, (const MKL_INT *)&dataStorage, data, 0, 0), + errcode); + __DAAL_VSLFN_CALL(vsldSSEditOutliersDetection, (task, (const MKL_INT *)&nParams, baconParams, baconWeights), errcode); + __DAAL_VSLFN_CALL(vsldSSCompute, (task, __DAAL_VSL_SS_OUTLIERS, __DAAL_VSL_SS_METHOD_BACON), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); return errcode; } static int xLowOrderMoments(double * data, __int64 nFeatures, __int64 nVectors, __int64 method, double * sum, double * mean, double * secondOrderRawMoment, double * variance, double * variation) { - DAAL_VSLSSTaskPtr task; + VSLSSTaskPtr task; int errcode = 0; __int64 dataStorage = __DAAL_VSL_SS_MATRIX_STORAGE_COLS; - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSNewTask, (&task, &nFeatures, &nVectors, &dataStorage, data, 0, 0, __SS_ILP_FLAG__), errcode); - - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_SUM, sum), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_MEAN, mean), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_2R_MOM, secondOrderRawMoment), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_2C_MOM, variance), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_VARIATION, variation), errcode); - - ThreadingFuncs threading = { _daal_mkl_threader_for, _daal_mkl_threader_for_ordered, _daal_mkl_threader_sections, _daal_mkl_threader_ordered, - _daal_mkl_threader_get_max_threads }; - - __DAAL_VSLFN_CALL(fpk_vsl_kernel, dSSBasic, - (task, __DAAL_VSL_SS_SUM | __DAAL_VSL_SS_MEAN | __DAAL_VSL_SS_2R_MOM | __DAAL_VSL_SS_2C_MOM | __DAAL_VSL_SS_VARIATION, - method, &threading), + __DAAL_VSLFN_CALL(vsldSSNewTask, (&task, (const MKL_INT *)&nFeatures, (const MKL_INT *)&nVectors, (const MKL_INT *)&dataStorage, data, 0, 0), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_SUM, sum), errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_MEAN, mean), errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_2R_MOM, secondOrderRawMoment), errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_2C_MOM, variance), errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_VARIATION, variation), errcode); + __DAAL_VSLFN_CALL( + vsldSSCompute, + (task, __DAAL_VSL_SS_SUM | __DAAL_VSL_SS_MEAN | __DAAL_VSL_SS_2R_MOM | __DAAL_VSL_SS_2C_MOM | __DAAL_VSL_SS_VARIATION, method), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); return errcode; } static int xSumAndVariance(double * data, __int64 nFeatures, __int64 nVectors, double * nPreviousObservations, __int64 method, double * sum, double * mean, double * secondOrderRawMoment, double * variance) { - DAAL_VSLSSTaskPtr task; + VSLSSTaskPtr task; int errcode = 0; __int64 dataStorage = __DAAL_VSL_SS_MATRIX_STORAGE_COLS; - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSNewTask, (&task, &nFeatures, &nVectors, &dataStorage, data, 0, 0, __SS_ILP_FLAG__), errcode); + __DAAL_VSLFN_CALL(vsldSSNewTask, (&task, (const MKL_INT *)&nFeatures, (const MKL_INT *)&nVectors, (const MKL_INT *)&dataStorage, data, 0, 0), + errcode); double weight[2] = { *nPreviousObservations, *nPreviousObservations }; - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_SUM, sum), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_MEAN, mean), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_2R_MOM, secondOrderRawMoment), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_2C_MOM, variance), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_ACCUM_WEIGHT, weight), errcode); - - ThreadingFuncs threading = { _daal_mkl_threader_for, _daal_mkl_threader_for_ordered, _daal_mkl_threader_sections, _daal_mkl_threader_ordered, - _daal_mkl_threader_get_max_threads }; - - __DAAL_VSLFN_CALL(fpk_vsl_kernel, dSSBasic, - (task, __DAAL_VSL_SS_SUM | __DAAL_VSL_SS_MEAN | __DAAL_VSL_SS_2R_MOM | __DAAL_VSL_SS_2C_MOM, method, &threading), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_SUM, sum), errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_MEAN, mean), errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_2R_MOM, secondOrderRawMoment), errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_2C_MOM, variance), errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_ACCUM_WEIGHT, weight), errcode); + __DAAL_VSLFN_CALL(vsldSSCompute, (task, __DAAL_VSL_SS_SUM | __DAAL_VSL_SS_MEAN | __DAAL_VSL_SS_2R_MOM | __DAAL_VSL_SS_2C_MOM, method), + errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); return errcode; } static int xQuantiles(const double * data, const __int64 nFeatures, const __int64 nVectors, const __int64 quantOrderN, const double * quantOrder, double * quants) { - DAAL_VSLSSTaskPtr task; + VSLSSTaskPtr task; int errcode = 0; __int64 dataStorage = __DAAL_VSL_SS_MATRIX_STORAGE_COLS; - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSNewTask, (&task, &nFeatures, &nVectors, &dataStorage, data, 0, 0, __SS_ILP_FLAG__), errcode); + __DAAL_VSLFN_CALL(vsldSSNewTask, (&task, (const MKL_INT *)&nFeatures, (const MKL_INT *)&nVectors, (const MKL_INT *)&dataStorage, data, 0, 0), + errcode); if (errcode) { return errcode; } - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsliSSEditTask, (task, __DAAL_VSL_SS_ED_QUANT_ORDER_N, &quantOrderN), errcode); + __DAAL_VSLFN_CALL(vsliSSEditTask, (task, __DAAL_VSL_SS_ED_QUANT_ORDER_N, (const MKL_INT *)&quantOrderN), errcode); if (errcode) { - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); return errcode; } - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_QUANT_ORDER, quantOrder), errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_QUANT_ORDER, quantOrder), errcode); if (errcode) { - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); return errcode; } - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_QUANT_QUANTILES, quants), errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_QUANT_QUANTILES, quants), errcode); if (errcode) { - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); return errcode; } - - ThreadingFuncs threading = { _daal_mkl_threader_for, _daal_mkl_threader_for_ordered, _daal_mkl_threader_sections, _daal_mkl_threader_ordered, - _daal_mkl_threader_get_max_threads }; - - __DAAL_VSLFN_CALL(fpk_vsl_kernel, dSSQuantiles, (task, __DAAL_VSL_SS_QUANTS, __DAAL_VSL_SS_METHOD_FAST, &threading), errcode); + __DAAL_VSLFN_CALL(vsldSSCompute, (task, __DAAL_VSL_SS_QUANTS, __DAAL_VSL_SS_METHOD_FAST), errcode); if (errcode) { - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); return errcode; } - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); return errcode; } static int xSort(double * data, __int64 nFeatures, __int64 nVectors, double * sortedData) { - DAAL_VSLSSTaskPtr task; + VSLSSTaskPtr task; int errcode = 0; __int64 inputStorage = __DAAL_VSL_SS_MATRIX_STORAGE_COLS; __int64 outputStorage = __DAAL_VSL_SS_MATRIX_STORAGE_COLS; - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSNewTask, (&task, &nFeatures, &nVectors, &inputStorage, data, 0, 0, __SS_ILP_FLAG__), errcode); + __DAAL_VSLFN_CALL(vsldSSNewTask, (&task, (const MKL_INT *)&nFeatures, (const MKL_INT *)&nVectors, (const MKL_INT *)&inputStorage, data, 0, 0), + errcode); if (errcode) { return errcode; } - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsldSSEditTask, (task, __DAAL_VSL_SS_ED_SORTED_OBSERV, sortedData), errcode); + __DAAL_VSLFN_CALL(vsldSSEditTask, (task, __DAAL_VSL_SS_ED_SORTED_OBSERV, sortedData), errcode); if (errcode) { - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); return errcode; } - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsliSSEditTask, (task, __DAAL_VSL_SS_ED_SORTED_OBSERV_STORAGE, &outputStorage), errcode); + __DAAL_VSLFN_CALL(vsliSSEditTask, (task, __DAAL_VSL_SS_ED_SORTED_OBSERV_STORAGE, (const MKL_INT *)&outputStorage), errcode); if (errcode) { - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); return errcode; } - ThreadingFuncs threading = { _daal_mkl_threader_for, _daal_mkl_threader_for_ordered, _daal_mkl_threader_sections, _daal_mkl_threader_ordered, - _daal_mkl_threader_get_max_threads }; - - __DAAL_VSLFN_CALL(fpk_vsl_kernel, dSSSort, (task, __DAAL_VSL_SS_SORTED_OBSERV, __DAAL_VSL_SS_METHOD_RADIX, &threading), errcode); + __DAAL_VSLFN_CALL(vsldSSCompute, (task, __DAAL_VSL_SS_SORTED_OBSERV, __DAAL_VSL_SS_METHOD_RADIX), errcode); if (errcode) { - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); return errcode; } - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); return errcode; } }; @@ -493,7 +395,7 @@ struct MklStatistics static int xcp(float * data, __int64 nFeatures, __int64 nVectors, float * nPreviousObservations, float * sum, float * crossProduct, __int64 method) { - DAAL_VSLSSTaskPtr task; + VSLSSTaskPtr task; int errcode = 0; __int64 dataStorage = __DAAL_VSL_SS_MATRIX_STORAGE_COLS; @@ -512,18 +414,15 @@ struct MklStatistics float weight[2] = { *nPreviousObservations, *nPreviousObservations }; - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSNewTask, (&task, &nFeatures, &nVectors, &dataStorage, data, 0, 0, __SS_ILP_FLAG__), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_SUM, sum), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_MEAN, mean), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_CP, crossProduct), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsliSSEditTask, (task, __DAAL_VSL_SS_ED_CP_STORAGE, &cpStorage), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_ACCUM_WEIGHT, weight), errcode); - - ThreadingFuncs threading = { _daal_mkl_threader_for, _daal_mkl_threader_for_ordered, _daal_mkl_threader_sections, _daal_mkl_threader_ordered, - _daal_mkl_threader_get_max_threads }; - - __DAAL_VSLFN_CALL(fpk_vsl_kernel, sSSBasic, (task, __DAAL_VSL_SS_CP | __DAAL_VSL_SS_SUM, method, &threading), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vslsSSNewTask, (&task, (const MKL_INT *)&nFeatures, (const MKL_INT *)&nVectors, (MKL_INT *)&dataStorage, data, 0, 0), + errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_SUM, sum), errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_MEAN, mean), errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_CP, crossProduct), errcode); + __DAAL_VSLFN_CALL(vsliSSEditTask, (task, __DAAL_VSL_SS_ED_CP_STORAGE, (const MKL_INT *)&cpStorage), errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_ACCUM_WEIGHT, weight), errcode); + __DAAL_VSLFN_CALL(vslsSSCompute, (task, __DAAL_VSL_SS_CP | __DAAL_VSL_SS_SUM, method), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); daal::services::daal_free(mean); mean = NULL; @@ -533,7 +432,7 @@ struct MklStatistics static int xxcp_weight(float * data, __int64 nFeatures, __int64 nVectors, float * weight, float * accumWeight, float * mean, float * crossProduct, __int64 method) { - DAAL_VSLSSTaskPtr task; + VSLSSTaskPtr task; int errcode = 0; __int64 dataStorage = __DAAL_VSL_SS_MATRIX_STORAGE_COLS; @@ -543,20 +442,16 @@ struct MklStatistics float accumWeightsAll[2] = { 0, 0 }; - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSNewTask, (&task, &nFeatures, &nVectors, &dataStorage, data, 0, 0, __SS_ILP_FLAG__), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_SUM, sum), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_MEAN, mean), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_WEIGHTS, weight), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_CP, crossProduct), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsliSSEditTask, (task, __DAAL_VSL_SS_ED_CP_STORAGE, &cpStorage), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_ACCUM_WEIGHT, accumWeightsAll), errcode); - - ThreadingFuncs threading = { _daal_mkl_threader_for_sequential, _daal_mkl_threader_for_ordered_sequential, - _daal_mkl_threader_sections_sequential, _daal_mkl_threader_ordered_sequential, - _daal_mkl_threader_get_max_threads_sequential }; - - __DAAL_VSLFN_CALL(fpk_vsl_kernel, sSSBasic, (task, __DAAL_VSL_SS_CP | __DAAL_VSL_SS_MEAN, method, &threading), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vslsSSNewTask, (&task, (const MKL_INT *)&nFeatures, (const MKL_INT *)&nVectors, (const MKL_INT *)&dataStorage, data, 0, 0), + errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_SUM, sum), errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_MEAN, mean), errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_WEIGHTS, weight), errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_CP, crossProduct), errcode); + __DAAL_VSLFN_CALL(vsliSSEditTask, (task, __DAAL_VSL_SS_ED_CP_STORAGE, (const MKL_INT *)&cpStorage), errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_ACCUM_WEIGHT, accumWeightsAll), errcode); + __DAAL_VSLFN_CALL(vslsSSCompute, (task, __DAAL_VSL_SS_CP | __DAAL_VSL_SS_MEAN, method), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); *accumWeight = accumWeightsAll[0]; @@ -568,7 +463,7 @@ struct MklStatistics static int xxvar_weight(float * data, __int64 nFeatures, __int64 nVectors, float * weight, float * accumWeight, float * mean, float * sampleVariance, __int64 method) { - DAAL_VSLSSTaskPtr task; + VSLSSTaskPtr task; int errcode = 0; __int64 dataStorage = __DAAL_VSL_SS_MATRIX_STORAGE_COLS; @@ -578,20 +473,16 @@ struct MklStatistics float accumWeightsAll[2] = { 0, 0 }; - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSNewTask, (&task, &nFeatures, &nVectors, &dataStorage, data, 0, 0, __SS_ILP_FLAG__), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_SUM, sum), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_MEAN, mean), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_WEIGHTS, weight), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_2C_SUM, sampleVariance), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_2R_MOM, rawSecond), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_ACCUM_WEIGHT, accumWeightsAll), errcode); - - ThreadingFuncs threading = { _daal_mkl_threader_for_sequential, _daal_mkl_threader_for_ordered_sequential, - _daal_mkl_threader_sections_sequential, _daal_mkl_threader_ordered_sequential, - _daal_mkl_threader_get_max_threads_sequential }; - - __DAAL_VSLFN_CALL(fpk_vsl_kernel, sSSBasic, (task, __DAAL_VSL_SS_2C_SUM | __DAAL_VSL_SS_MEAN, method, &threading), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vslsSSNewTask, (&task, (const MKL_INT *)&nFeatures, (const MKL_INT *)&nVectors, (const MKL_INT *)&dataStorage, data, 0, 0), + errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_SUM, sum), errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_MEAN, mean), errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_WEIGHTS, weight), errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_2C_SUM, sampleVariance), errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_2R_MOM, rawSecond), errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_ACCUM_WEIGHT, accumWeightsAll), errcode); + __DAAL_VSLFN_CALL(vslsSSCompute, (task, __DAAL_VSL_SS_2C_SUM | __DAAL_VSL_SS_MEAN, method), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); *accumWeight = accumWeightsAll[0]; @@ -602,7 +493,7 @@ struct MklStatistics static int x2c_mom(const float * data, const __int64 nFeatures, const __int64 nVectors, float * variance, const __int64 method) { - DAAL_VSLSSTaskPtr task; + VSLSSTaskPtr task; int errcode = 0; __int64 dataStorage = __DAAL_VSL_SS_MATRIX_STORAGE_COLS; @@ -610,16 +501,13 @@ struct MklStatistics float * mean = (float *)daal::services::daal_malloc(nFeatures * sizeof(float)); float * secondOrderRawMoment = (float *)daal::services::daal_malloc(nFeatures * sizeof(float)); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSNewTask, (&task, &nFeatures, &nVectors, &dataStorage, data, 0, 0, __SS_ILP_FLAG__), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_MEAN, mean), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_2R_MOM, secondOrderRawMoment), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_2C_MOM, variance), errcode); - - ThreadingFuncs threading = { _daal_mkl_threader_for, _daal_mkl_threader_for_ordered, _daal_mkl_threader_sections, _daal_mkl_threader_ordered, - _daal_mkl_threader_get_max_threads }; - - __DAAL_VSLFN_CALL(fpk_vsl_kernel, sSSBasic, (task, __DAAL_VSL_SS_2C_MOM, method, &threading), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vslsSSNewTask, (&task, (const MKL_INT *)&nFeatures, (const MKL_INT *)&nVectors, (const MKL_INT *)&dataStorage, data, 0, 0), + errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_MEAN, mean), errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_2R_MOM, secondOrderRawMoment), errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_2C_MOM, variance), errcode); + __DAAL_VSLFN_CALL(vslsSSCompute, (task, __DAAL_VSL_SS_2C_MOM, method), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); daal::services::daal_free(mean); daal::services::daal_free(secondOrderRawMoment); @@ -632,164 +520,150 @@ struct MklStatistics static int xoutlierdetection(const float * data, const __int64 nFeatures, const __int64 nVectors, const __int64 nParams, const float * baconParams, float * baconWeights) { - DAAL_VSLSSTaskPtr task; + VSLSSTaskPtr task; int errcode = 0; __int64 dataStorage = __DAAL_VSL_SS_MATRIX_STORAGE_COLS; - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSNewTask, (&task, &nFeatures, &nVectors, &dataStorage, data, 0, 0, __SS_ILP_FLAG__), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditOutDetect, (task, &nParams, baconParams, baconWeights), errcode); - - ThreadingFuncs threading = { _daal_mkl_threader_for, _daal_mkl_threader_for_ordered, _daal_mkl_threader_sections, _daal_mkl_threader_ordered, - _daal_mkl_threader_get_max_threads }; - - __DAAL_VSLFN_CALL(fpk_vsl_kernel, sSSOutliersDetection, (task, __DAAL_VSL_SS_OUTLIERS, __DAAL_VSL_SS_METHOD_BACON, &threading), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vslsSSNewTask, (&task, (const MKL_INT *)&nFeatures, (const MKL_INT *)&nVectors, (const MKL_INT *)&dataStorage, data, 0, 0), + errcode); + __DAAL_VSLFN_CALL(vslsSSEditOutliersDetection, (task, (const MKL_INT *)&nParams, baconParams, baconWeights), errcode); + __DAAL_VSLFN_CALL(vslsSSCompute, (task, __DAAL_VSL_SS_OUTLIERS, __DAAL_VSL_SS_METHOD_BACON), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); return errcode; } static int xLowOrderMoments(float * data, __int64 nFeatures, __int64 nVectors, __int64 method, float * sum, float * mean, float * secondOrderRawMoment, float * variance, float * variation) { - DAAL_VSLSSTaskPtr task; + VSLSSTaskPtr task; int errcode = 0; __int64 dataStorage = __DAAL_VSL_SS_MATRIX_STORAGE_COLS; - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSNewTask, (&task, &nFeatures, &nVectors, &dataStorage, data, 0, 0, __SS_ILP_FLAG__), errcode); - - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_SUM, sum), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_MEAN, mean), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_2R_MOM, secondOrderRawMoment), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_2C_MOM, variance), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_VARIATION, variation), errcode); - - ThreadingFuncs threading = { _daal_mkl_threader_for, _daal_mkl_threader_for_ordered, _daal_mkl_threader_sections, _daal_mkl_threader_ordered, - _daal_mkl_threader_get_max_threads }; - - __DAAL_VSLFN_CALL(fpk_vsl_kernel, sSSBasic, - (task, __DAAL_VSL_SS_SUM | __DAAL_VSL_SS_MEAN | __DAAL_VSL_SS_2R_MOM | __DAAL_VSL_SS_2C_MOM | __DAAL_VSL_SS_VARIATION, - method, &threading), + __DAAL_VSLFN_CALL(vslsSSNewTask, (&task, (const MKL_INT *)&nFeatures, (const MKL_INT *)&nVectors, (const MKL_INT *)&dataStorage, data, 0, 0), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_SUM, sum), errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_MEAN, mean), errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_2R_MOM, secondOrderRawMoment), errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_2C_MOM, variance), errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_VARIATION, variation), errcode); + __DAAL_VSLFN_CALL( + vslsSSCompute, + (task, __DAAL_VSL_SS_SUM | __DAAL_VSL_SS_MEAN | __DAAL_VSL_SS_2R_MOM | __DAAL_VSL_SS_2C_MOM | __DAAL_VSL_SS_VARIATION, method), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); return errcode; } static int xSumAndVariance(float * data, __int64 nFeatures, __int64 nVectors, float * nPreviousObservations, __int64 method, float * sum, float * mean, float * secondOrderRawMoment, float * variance) { - DAAL_VSLSSTaskPtr task; + VSLSSTaskPtr task; int errcode = 0; __int64 dataStorage = __DAAL_VSL_SS_MATRIX_STORAGE_COLS; - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSNewTask, (&task, &nFeatures, &nVectors, &dataStorage, data, 0, 0, __SS_ILP_FLAG__), errcode); + __DAAL_VSLFN_CALL(vslsSSNewTask, (&task, (const MKL_INT *)&nFeatures, (const MKL_INT *)&nVectors, (const MKL_INT *)&dataStorage, data, 0, 0), + errcode); float weight[2] = { *nPreviousObservations, *nPreviousObservations }; - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_SUM, sum), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_MEAN, mean), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_2R_MOM, secondOrderRawMoment), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_2C_MOM, variance), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_ACCUM_WEIGHT, weight), errcode); - - ThreadingFuncs threading = { _daal_mkl_threader_for, _daal_mkl_threader_for_ordered, _daal_mkl_threader_sections, _daal_mkl_threader_ordered, - _daal_mkl_threader_get_max_threads }; - - __DAAL_VSLFN_CALL(fpk_vsl_kernel, sSSBasic, - (task, __DAAL_VSL_SS_SUM | __DAAL_VSL_SS_MEAN | __DAAL_VSL_SS_2R_MOM | __DAAL_VSL_SS_2C_MOM, method, &threading), errcode); - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_SUM, sum), errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_MEAN, mean), errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_2R_MOM, secondOrderRawMoment), errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_2C_MOM, variance), errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_ACCUM_WEIGHT, weight), errcode); + __DAAL_VSLFN_CALL(vslsSSCompute, (task, __DAAL_VSL_SS_SUM | __DAAL_VSL_SS_MEAN | __DAAL_VSL_SS_2R_MOM | __DAAL_VSL_SS_2C_MOM, method), + errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); return errcode; } static int xQuantiles(const float * data, const __int64 nFeatures, const __int64 nVectors, const __int64 quantOrderN, const float * quantOrder, float * quants) { - DAAL_VSLSSTaskPtr task; + VSLSSTaskPtr task; int errcode = 0; __int64 dataStorage = __DAAL_VSL_SS_MATRIX_STORAGE_COLS; - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSNewTask, (&task, &nFeatures, &nVectors, &dataStorage, data, 0, 0, __SS_ILP_FLAG__), errcode); + __DAAL_VSLFN_CALL(vslsSSNewTask, (&task, (const MKL_INT *)&nFeatures, (const MKL_INT *)&nVectors, (const MKL_INT *)&dataStorage, data, 0, 0), + errcode); if (errcode) { return errcode; } - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsliSSEditTask, (task, __DAAL_VSL_SS_ED_QUANT_ORDER_N, &quantOrderN), errcode); + __DAAL_VSLFN_CALL(vsliSSEditTask, (task, __DAAL_VSL_SS_ED_QUANT_ORDER_N, (const MKL_INT *)&quantOrderN), errcode); if (errcode) { - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); return errcode; } - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_QUANT_ORDER, quantOrder), errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_QUANT_ORDER, quantOrder), errcode); if (errcode) { - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); return errcode; } - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_QUANT_QUANTILES, quants), errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_QUANT_QUANTILES, quants), errcode); if (errcode) { - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); return errcode; } - ThreadingFuncs threading = { _daal_mkl_threader_for, _daal_mkl_threader_for_ordered, _daal_mkl_threader_sections, _daal_mkl_threader_ordered, - _daal_mkl_threader_get_max_threads }; - - __DAAL_VSLFN_CALL(fpk_vsl_kernel, sSSQuantiles, (task, __DAAL_VSL_SS_QUANTS, __DAAL_VSL_SS_METHOD_FAST, &threading), errcode); + __DAAL_VSLFN_CALL(vslsSSCompute, (task, __DAAL_VSL_SS_QUANTS, __DAAL_VSL_SS_METHOD_FAST), errcode); if (errcode) { - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); return errcode; } - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); return errcode; } static int xSort(float * data, __int64 nFeatures, __int64 nVectors, float * sortedData) { - DAAL_VSLSSTaskPtr task; + VSLSSTaskPtr task; int errcode = 0; __int64 inputStorage = __DAAL_VSL_SS_MATRIX_STORAGE_COLS; __int64 outputStorage = __DAAL_VSL_SS_MATRIX_STORAGE_COLS; - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSNewTask, (&task, &nFeatures, &nVectors, &inputStorage, data, 0, 0, __SS_ILP_FLAG__), errcode); + __DAAL_VSLFN_CALL(vslsSSNewTask, (&task, (const MKL_INT *)&nFeatures, (const MKL_INT *)&nVectors, (const MKL_INT *)&inputStorage, data, 0, 0), + errcode); if (errcode) { return errcode; } - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslsSSEditTask, (task, __DAAL_VSL_SS_ED_SORTED_OBSERV, sortedData), errcode); + __DAAL_VSLFN_CALL(vslsSSEditTask, (task, __DAAL_VSL_SS_ED_SORTED_OBSERV, sortedData), errcode); if (errcode) { - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); return errcode; } - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vsliSSEditTask, (task, __DAAL_VSL_SS_ED_SORTED_OBSERV_STORAGE, &outputStorage), errcode); + __DAAL_VSLFN_CALL(vsliSSEditTask, (task, __DAAL_VSL_SS_ED_SORTED_OBSERV_STORAGE, (const MKL_INT *)&outputStorage), errcode); if (errcode) { - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); return errcode; } - ThreadingFuncs threading = { _daal_mkl_threader_for, _daal_mkl_threader_for_ordered, _daal_mkl_threader_sections, _daal_mkl_threader_ordered, - _daal_mkl_threader_get_max_threads }; - - __DAAL_VSLFN_CALL(fpk_vsl_kernel, sSSSort, (task, __DAAL_VSL_SS_SORTED_OBSERV, __DAAL_VSL_SS_METHOD_RADIX, &threading), errcode); + __DAAL_VSLFN_CALL(vslsSSCompute, (task, __DAAL_VSL_SS_SORTED_OBSERV, __DAAL_VSL_SS_METHOD_RADIX), errcode); if (errcode) { - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); return errcode; } - __DAAL_VSLFN_CALL(fpk_vsl_sub_kernel, vslSSDeleteTask, (&task), errcode); + __DAAL_VSLFN_CALL(vslSSDeleteTask, (&task), errcode); return errcode; } }; diff --git a/cpp/daal/src/externals/service_stat_ref.h b/cpp/daal/src/externals/service_stat_ref.h index 0ff35505527..e37a067f146 100644 --- a/cpp/daal/src/externals/service_stat_ref.h +++ b/cpp/daal/src/externals/service_stat_ref.h @@ -72,81 +72,6 @@ extern "C" #define __DAAL_VSL_SS_ERROR_BAD_QUANT_ORDER -4022 #define __DAAL_VSL_SS_ERROR_INDICES_NOT_SUPPORTED -4085 - - typedef void (*threadfuncfor)(DAAL_INT, DAAL_INT, void *, func_type); - typedef void (*threadfuncforordered)(DAAL_INT, DAAL_INT, void *, func_type); - typedef void (*threadfuncsection)(DAAL_INT, void *, func_type); - typedef void (*threadfuncordered)(DAAL_INT, DAAL_INT, DAAL_INT, void *, func_type); - typedef DAAL_INT (*threadgetlimit)(void); - - struct ThreadingFuncs - { - threadfuncfor funcfor; - threadfuncfor funcforordered; - threadfuncsection funcsection; - threadfuncordered funcordered; - threadgetlimit getlimit; - }; - - [[maybe_unused]] static void _daal_mkl_threader_for_sequential(DAAL_INT n, DAAL_INT threads_request, void * a, func_type func) - { - DAAL_INT i; - - for (i = 0; i < n; i++) - { - func(i, 0, 1, a); - } - } - - [[maybe_unused]] static void _daal_mkl_threader_for_ordered_sequential(DAAL_INT n, DAAL_INT threads_request, void * a, func_type func) - { - DAAL_INT i; - - for (i = 0; i < n; i++) - { - func(i, 0, 1, a); - } - } - - [[maybe_unused]] static void _daal_mkl_threader_sections_sequential(DAAL_INT threads_request, void * a, func_type func) - { - func(0, 0, 1, a); - } - - [[maybe_unused]] static void _daal_mkl_threader_ordered_sequential(DAAL_INT i, DAAL_INT th_idx, DAAL_INT th_num, void * a, func_type func) - { - func(i, th_idx, th_num, a); - } - - [[maybe_unused]] static DAAL_INT _daal_mkl_threader_get_max_threads_sequential() - { - return 1; - } - - [[maybe_unused]] static void _daal_mkl_threader_for(DAAL_INT n, DAAL_INT threads_request, void * a, func_type func) - { - //fpk_vsl_serv_threader_for(n, threads_request, a, func); - } - - [[maybe_unused]] static void _daal_mkl_threader_for_ordered(DAAL_INT n, DAAL_INT threads_request, void * a, func_type func) - { - //fpk_vsl_serv_threader_for_ordered(n, threads_request, a, func); - } - - [[maybe_unused]] static void _daal_mkl_threader_sections(DAAL_INT threads_request, void * a, func_type func) - { - //fpk_vsl_serv_threader_sections(threads_request, a, func); - } - - [[maybe_unused]] static void _daal_mkl_threader_ordered(DAAL_INT i, DAAL_INT th_idx, DAAL_INT th_num, void * a, func_type func) - { - //not used. To be implemented if needed. - } - - [[maybe_unused]] static DAAL_INT _daal_mkl_threader_get_max_threads() - { - return 1; - } } namespace daal diff --git a/cpp/daal/src/externals/service_stat_rng_mkl.h b/cpp/daal/src/externals/service_stat_rng_mkl.h index d0719eeeaba..f3c8f05758b 100644 --- a/cpp/daal/src/externals/service_stat_rng_mkl.h +++ b/cpp/daal/src/externals/service_stat_rng_mkl.h @@ -24,22 +24,15 @@ #ifndef __SERVICE_STAT_RNG_MKL_H__ #define __SERVICE_STAT_RNG_MKL_H__ -#if !defined(__DAAL_CONCAT5) - #define __DAAL_CONCAT5(a, b, c, d, e) __DAAL_CONCAT51(a, b, c, d, e) - #define __DAAL_CONCAT51(a, b, c, d, e) a##b##c##d##e -#endif - -#define __DAAL_VSLFN(f_cpu, f_pref, f_name) __DAAL_CONCAT5(f_pref, _, f_cpu, _, f_name) -#define __DAAL_VSLFN_CALL(f_pref, f_name, f_args, errcode) __DAAL_VSLFN_CALL1(f_pref, f_name, f_args, errcode) -#define __DAAL_VSLFN_CALL_NR(f_pref, f_name, f_args, errcode) __DAAL_VSLFN_CALL2(f_pref, f_name, f_args, errcode) -#define __DAAL_VSLFN_CALL_NR_WHILE(f_pref, f_name, f_args, errcode) \ +#define __DAAL_VSLFN_CALL_NR(f_name, f_args, errcode) __DAAL_VSLFN_CALL_NO_V(f_name, f_args, errcode) +#define __DAAL_VSLFN_CALL_NR_WHILE(f_name, f_args, errcode) \ { \ size_t nn_left = n; \ while (nn_left > 0) \ { \ nn = (nn_left > 0xFFFFFFFL) ? 0xFFFFFFF : (int)(nn_left); \ \ - __DAAL_VSLFN_CALL2(f_pref, f_name, f_args, errcode); \ + __DAAL_VSLFN_CALL_V(f_name, f_args, errcode); \ if (errcode < 0) return errcode; \ \ rr += nn; \ @@ -47,51 +40,14 @@ } \ } -#if defined(__APPLE__) - #define __DAAL_MKLVSL_SSE2 e9 - #define __DAAL_MKLVSL_SSE42 e9 -#else - #define __DAAL_MKLVSL_SSE2 ex - #define __DAAL_MKLVSL_SSE42 h8 -#endif - -#define __DAAL_VSLFN_CALL1(f_pref, f_name, f_args, errcode) \ - if (avx512 == cpu) \ - { \ - errcode = __DAAL_VSLFN(z0, f_pref, f_name) f_args; \ - } \ - if (avx2 == cpu) \ - { \ - errcode = __DAAL_VSLFN(l9, f_pref, f_name) f_args; \ - } \ - if (sse42 == cpu) \ - { \ - errcode = __DAAL_VSLFN(__DAAL_MKLVSL_SSE42, f_pref, f_name) f_args; \ - } \ - if (sse2 == cpu) \ - { \ - errcode = __DAAL_VSLFN(__DAAL_MKLVSL_SSE2, f_pref, f_name) f_args; \ - } \ - if (errcode != 0) \ - { \ - return errcode; \ +#define __DAAL_VSLFN_CALL_V(f_name, f_args, retcode) \ + { \ + retcode = v##f_name f_args; \ } -#define __DAAL_VSLFN_CALL2(f_pref, f_name, f_args, retcode) \ - if (avx512 == cpu) \ - { \ - retcode = __DAAL_VSLFN(z0, f_pref, f_name) f_args; \ - } \ - if (avx2 == cpu) \ - { \ - retcode = __DAAL_VSLFN(l9, f_pref, f_name) f_args; \ - } \ - if (sse42 == cpu) \ - { \ - retcode = __DAAL_VSLFN(__DAAL_MKLVSL_SSE42, f_pref, f_name) f_args; \ - } \ - if (sse2 == cpu) \ - { \ - retcode = __DAAL_VSLFN(__DAAL_MKLVSL_SSE2, f_pref, f_name) f_args; \ + +#define __DAAL_VSLFN_CALL_NO_V(f_name, f_args, retcode) \ + { \ + retcode = f_name f_args; \ } #endif diff --git a/cpp/daal/src/externals/service_stat_rng_ref.h b/cpp/daal/src/externals/service_stat_rng_ref.h index 182d16ca8b1..eb5526242a7 100644 --- a/cpp/daal/src/externals/service_stat_rng_ref.h +++ b/cpp/daal/src/externals/service_stat_rng_ref.h @@ -27,11 +27,6 @@ #include "src/externals/service_stat_rng_ref.h" -#if !defined(__DAAL_CONCAT2) - #define __DAAL_CONCAT2(a, b) a##b -#endif - -#define __DAAL_VSLFN(f_pref, f_name) __DAAL_CONCAT2(f_pref, f_name) #define __DAAL_VSLFN_CALL_NR(f_pref, f_name, f_args, errcode) __DAAL_VSLFN_CALL(f_pref, f_name, f_args, errcode) #define __DAAL_VSLFN_CALL_NR_WHILE(f_pref, f_name, f_args, errcode) \ { \ diff --git a/cpp/daal/src/externals/istrconv_daal_el.h b/cpp/daal/src/externals/service_thread_declar_mkl.cpp old mode 100755 new mode 100644 similarity index 60% rename from cpp/daal/src/externals/istrconv_daal_el.h rename to cpp/daal/src/externals/service_thread_declar_mkl.cpp index 32d0ef187ee..b94fc76cdda --- a/cpp/daal/src/externals/istrconv_daal_el.h +++ b/cpp/daal/src/externals/service_thread_declar_mkl.cpp @@ -1,5 +1,6 @@ +/* file: service_thread_declar_mkl.cpp */ /******************************************************************************* -* Copyright 2014 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,22 +15,18 @@ * limitations under the License. *******************************************************************************/ -// -// Abstract: -// -// External header file for libistrconv. -// -// ============================================================================= - -#ifndef _ISTRCONV_EL_H_ -#define _ISTRCONV_EL_H_ - -#if defined(__cplusplus) - #define _ISTRCONV_EXTERN_C extern "C" -#else - #define _ISTRCONV_EXTERN_C extern -#endif - -_ISTRCONV_EXTERN_C int __FPK_string_to_int_generic(const char * nptr, char ** endptr); - -#endif /*_ISTRCONV_H_*/ +namespace daal +{ +namespace internal +{ +namespace mkl +{ +//It's a placeholder, the real function calls exact in xfunctions. +//TODO: add correct threading control +int mkl_serv_set_num_threads_local(int nthreads) +{ + return nthreads; +} +} // namespace mkl +} // namespace internal +} // namespace daal diff --git a/cpp/oneapi/dal/backend/micromkl/micromkl.hpp b/cpp/daal/src/externals/service_thread_declar_mkl.h similarity index 58% rename from cpp/oneapi/dal/backend/micromkl/micromkl.hpp rename to cpp/daal/src/externals/service_thread_declar_mkl.h index 6f64b784c93..9ea71b4aa16 100644 --- a/cpp/oneapi/dal/backend/micromkl/micromkl.hpp +++ b/cpp/daal/src/externals/service_thread_declar_mkl.h @@ -1,5 +1,6 @@ +/* file: service_thread_declar_mkl.h */ /******************************************************************************* -* Copyright 2021 Intel Corporation +* Copyright contributors to the oneDAL project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,23 +15,14 @@ * limitations under the License. *******************************************************************************/ -#pragma once - -#include "oneapi/dal/common.hpp" - -namespace oneapi::dal::backend::micromkl { - -template -void syevd(char jobz, - char uplo, - std::int64_t n, - Float* a, - std::int64_t lda, - Float* w, - Float* work, - std::int64_t lwork, - std::int64_t* iwork, - std::int64_t liwork, - std::int64_t& info); - -} // namespace oneapi::dal::backend::micromkl +namespace daal +{ +namespace internal +{ +namespace mkl +{ +//It's a placeholder, the real function calls exact in xfunctions. +int mkl_serv_set_num_threads_local(int nthreads); +} // namespace mkl +} // namespace internal +} // namespace daal diff --git a/cpp/daal/src/services/library_version_info.cpp b/cpp/daal/src/services/library_version_info.cpp index 75c5169998c..513afe181ee 100644 --- a/cpp/daal/src/services/library_version_info.cpp +++ b/cpp/daal/src/services/library_version_info.cpp @@ -27,7 +27,7 @@ #include "services/env_detect.h" #ifndef DAAL_REF // temporary!!! should depend both on BACKEND and TARGETARCH - #include "mkl_daal.h" + #include static const char * cpu_long_names[] = { "Generic", "Supplemental Streaming SIMD Extensions 3", "Intel(R) Streaming SIMD Extensions 4.2", @@ -50,8 +50,7 @@ DAAL_EXPORT daal::services::LibraryVersionInfo::LibraryVersionInfo() build_rev(BUILD_REV), name(PRODUCT_NAME_STR), #ifndef DAAL_REF - // fpk_serv_cpuisknm might be instantiated from backed like other MKL functions - processor(cpu_long_names[daal::services::Environment::getInstance()->getCpuId() + 2 * fpk_serv_cpuisknm()]) + processor(cpu_long_names[daal::services::Environment::getInstance()->getCpuId()]) #else processor(cpu_long_names[0]) #endif diff --git a/cpp/daal/src/services/service_algo_utils.cpp b/cpp/daal/src/services/service_algo_utils.cpp index ca81000067c..eb3cc8b1e3e 100755 --- a/cpp/daal/src/services/service_algo_utils.cpp +++ b/cpp/daal/src/services/service_algo_utils.cpp @@ -21,9 +21,10 @@ //-- */ +#include "src/services/service_utils.h" +#include "src/services/service_algo_utils.h" #include "services/error_indexes.h" #include "services/error_handling.h" -#include "src/services/service_algo_utils.h" namespace daal { diff --git a/cpp/daal/src/services/service_topo.cpp b/cpp/daal/src/services/service_topo.cpp index 230124af36f..adaccc7f983 100644 --- a/cpp/daal/src/services/service_topo.cpp +++ b/cpp/daal/src/services/service_topo.cpp @@ -1817,14 +1817,14 @@ unsigned _internal_daal_GetSysLogicalProcessorCount() } /* - * _internal_daal_GetSysProcessorCoreCount + * _internal_daal_GetProcessorCoreCount * * Returns count of processor cores in the system that were enumerated by this app * * Arguments: None * Return: Number of physical processors or 0 if number can not be calculated */ -unsigned _internal_daal_GetSysProcessorCoreCount() +unsigned _internal_daal_GetProcessorCoreCount() { if (!glbl_obj.isInit) __internal_daal_initCpuTopology(); @@ -1870,7 +1870,7 @@ unsigned _internal_daal_GetCoreCountPerEachCache(unsigned subleaf, unsigned cach unsigned _internal_daal_GetLogicalProcessorQueue(int * queue) { const int cpus = _internal_daal_GetSysLogicalProcessorCount(); - int cores = _internal_daal_GetSysProcessorCoreCount(); + int cores = _internal_daal_GetProcessorCoreCount(); if (cores == 0) cores = 1; diff --git a/cpp/daal/src/services/service_topo.h b/cpp/daal/src/services/service_topo.h index c2c0ad142d9..f60bcef199d 100644 --- a/cpp/daal/src/services/service_topo.h +++ b/cpp/daal/src/services/service_topo.h @@ -111,7 +111,7 @@ constexpr LNX_PTR2INT LNX_MY1CON = 1LL; #endif #else /* WINDOWS */ - + #define NOMINMAX #include #ifdef _M_IA64 @@ -337,17 +337,17 @@ struct glktsn ~glktsn() { FreeArrays(); } }; -static unsigned long __internal_daal_getBitsFromDWORD(const unsigned int val, const char from, const char to); -static unsigned __internal_daal_createMask(unsigned numEntries, unsigned * maskLength); -static unsigned __internal_daal_slectOrdfromPkg(unsigned package, unsigned core, unsigned logical); -static unsigned __internal_daal_getAPICID(unsigned processor); -static void __internal_daal_initCpuTopology(); -static int __internal_daal_bindContext(unsigned cpu, void * prevAffinity); -static void __internal_daal_restoreContext(void * prevAffinity); -static void __internal_daal_setChkProcessAffinityConsistency(unsigned lcl_OSProcessorCount); -static void __internal_daal_setGenericAffinityBit(GenericAffinityMask * pAffinityMap, unsigned cpu); -static void __internal_daal_getCpuidInfo(CPUIDinfo * info, const unsigned int func, const unsigned int subfunc); -static int __internal_daal_countBits(DWORD_PTR x); +[[maybe_unused]] static unsigned long __internal_daal_getBitsFromDWORD(const unsigned int val, const char from, const char to); +[[maybe_unused]] static unsigned __internal_daal_createMask(unsigned numEntries, unsigned * maskLength); +[[maybe_unused]] static unsigned __internal_daal_slectOrdfromPkg(unsigned package, unsigned core, unsigned logical); +[[maybe_unused]] static unsigned __internal_daal_getAPICID(unsigned processor); +[[maybe_unused]] static void __internal_daal_initCpuTopology(); +[[maybe_unused]] static int __internal_daal_bindContext(unsigned cpu, void * prevAffinity); +[[maybe_unused]] static void __internal_daal_restoreContext(void * prevAffinity); +[[maybe_unused]] static void __internal_daal_setChkProcessAffinityConsistency(unsigned lcl_OSProcessorCount); +[[maybe_unused]] static void __internal_daal_setGenericAffinityBit(GenericAffinityMask * pAffinityMap, unsigned cpu); +[[maybe_unused]] static void __internal_daal_getCpuidInfo(CPUIDinfo * info, const unsigned int func, const unsigned int subfunc); +[[maybe_unused]] static int __internal_daal_countBits(DWORD_PTR x); unsigned _internal_daal_GetMaxCPUSupportedByOS(); unsigned _internal_daal_GetOSLogicalProcessorCount(); diff --git a/cpp/daal/src/threading/export.def b/cpp/daal/src/threading/export.def index 4ee8912077a..7962b0a8844 100644 --- a/cpp/daal/src/threading/export.def +++ b/cpp/daal/src/threading/export.def @@ -15,266 +15,3 @@ ;=============================================================================== EXPORTS -fpk_blas_avx2_daxpy -fpk_blas_avx2_dgemm -fpk_blas_avx2_dgemv -fpk_blas_avx2_dsymm -fpk_blas_avx2_dsyr -fpk_blas_avx2_dsyrk -fpk_blas_avx2_saxpy -fpk_blas_avx2_sgemm -fpk_blas_avx2_sgemv -fpk_blas_avx2_ssymm -fpk_blas_avx2_ssyr -fpk_blas_avx2_ssyrk -fpk_blas_avx2_xdgemm -fpk_blas_avx2_xdsymm -fpk_blas_avx2_xdsyr -fpk_blas_avx2_xdsyrk -fpk_blas_avx2_xsgemm -fpk_blas_avx2_xssymm -fpk_blas_avx2_xssyr -fpk_blas_avx2_xssyrk -fpk_blas_avx2_xsdot -fpk_blas_avx2_xddot -fpk_blas_avx512_daxpy -fpk_blas_avx512_dgemm -fpk_blas_avx512_dgemv -fpk_blas_avx512_dsymm -fpk_blas_avx512_dsyr -fpk_blas_avx512_dsyrk -fpk_blas_avx512_saxpy -fpk_blas_avx512_sgemm -fpk_blas_avx512_sgemv -fpk_blas_avx512_ssymm -fpk_blas_avx512_ssyr -fpk_blas_avx512_ssyrk -fpk_blas_avx512_xdgemm -fpk_blas_avx512_xdsymm -fpk_blas_avx512_xdsyr -fpk_blas_avx512_xdsyrk -fpk_blas_avx512_xsgemm -fpk_blas_avx512_xssymm -fpk_blas_avx512_xssyr -fpk_blas_avx512_xssyrk -fpk_blas_avx512_xsdot -fpk_blas_avx512_xddot -fpk_blas_sse2_daxpy -fpk_blas_sse2_dgemm -fpk_blas_sse2_dgemv -fpk_blas_sse2_dsymm -fpk_blas_sse2_dsyr -fpk_blas_sse2_dsyrk -fpk_blas_sse2_saxpy -fpk_blas_sse2_sgemm -fpk_blas_sse2_sgemv -fpk_blas_sse2_ssymm -fpk_blas_sse2_ssyr -fpk_blas_sse2_ssyrk -fpk_blas_sse2_xdgemm -fpk_blas_sse2_xdsymm -fpk_blas_sse2_xdsyr -fpk_blas_sse2_xdsyrk -fpk_blas_sse2_xsgemm -fpk_blas_sse2_xssymm -fpk_blas_sse2_xssyr -fpk_blas_sse2_xssyrk -fpk_blas_sse2_xsdot -fpk_blas_sse2_xddot -fpk_blas_sse42_daxpy -fpk_blas_sse42_dgemm -fpk_blas_sse42_dgemv -fpk_blas_sse42_dsymm -fpk_blas_sse42_dsyr -fpk_blas_sse42_dsyrk -fpk_blas_sse42_saxpy -fpk_blas_sse42_sgemm -fpk_blas_sse42_sgemv -fpk_blas_sse42_ssymm -fpk_blas_sse42_ssyr -fpk_blas_sse42_ssyrk -fpk_blas_sse42_xdgemm -fpk_blas_sse42_xdsymm -fpk_blas_sse42_xdsyr -fpk_blas_sse42_xdsyrk -fpk_blas_sse42_xsgemm -fpk_blas_sse42_xssymm -fpk_blas_sse42_xssyr -fpk_blas_sse42_xssyrk -fpk_blas_sse42_xsdot -fpk_blas_sse42_xddot -fpk_dft_avx2_ippsSortRadixAscend_32f_I -fpk_dft_avx2_ippsSortRadixAscend_64f_I -fpk_dft_avx512_ippsSortRadixAscend_32f_I -fpk_dft_avx512_ippsSortRadixAscend_64f_I -fpk_dft_sse2_ippsSortRadixAscend_32f_I -fpk_dft_sse2_ippsSortRadixAscend_64f_I -fpk_dft_sse42_ippsSortRadixAscend_32f_I -fpk_dft_sse42_ippsSortRadixAscend_64f_I -fpk_lapack_avx2_dgeqp3 -fpk_lapack_avx2_dgeqrf -fpk_lapack_avx2_dgerqf -fpk_lapack_avx2_dgesvd -fpk_lapack_avx2_dorgqr -fpk_lapack_avx2_dormqr -fpk_lapack_avx2_dormrq -fpk_lapack_avx2_dpotrf -fpk_lapack_avx2_dpotri -fpk_lapack_avx2_dpotrs -fpk_lapack_avx2_dgetrf -fpk_lapack_avx2_dgetrs -fpk_lapack_avx2_dpptrf -fpk_lapack_avx2_dsyev -fpk_lapack_avx2_dsyevd -fpk_lapack_avx2_dtrtrs -fpk_lapack_avx2_sgeqp3 -fpk_lapack_avx2_sgeqrf -fpk_lapack_avx2_sgerqf -fpk_lapack_avx2_sgesvd -fpk_lapack_avx2_sorgqr -fpk_lapack_avx2_sormqr -fpk_lapack_avx2_sormrq -fpk_lapack_avx2_spotrf -fpk_lapack_avx2_spotri -fpk_lapack_avx2_spotrs -fpk_lapack_avx2_sgetrf -fpk_lapack_avx2_sgetrs -fpk_lapack_avx2_spptrf -fpk_lapack_avx2_ssyev -fpk_lapack_avx2_ssyevd -fpk_lapack_avx2_strtrs -fpk_lapack_avx512_dgeqp3 -fpk_lapack_avx512_dgeqrf -fpk_lapack_avx512_dgerqf -fpk_lapack_avx512_dgesvd -fpk_lapack_avx512_dorgqr -fpk_lapack_avx512_dormqr -fpk_lapack_avx512_dormrq -fpk_lapack_avx512_dpotrf -fpk_lapack_avx512_dpotri -fpk_lapack_avx512_dpotrs -fpk_lapack_avx512_dgetrf -fpk_lapack_avx512_dgetrs -fpk_lapack_avx512_dpptrf -fpk_lapack_avx512_dsyev -fpk_lapack_avx512_dsyevd -fpk_lapack_avx512_dtrtrs -fpk_lapack_avx512_sgeqp3 -fpk_lapack_avx512_sgeqrf -fpk_lapack_avx512_sgerqf -fpk_lapack_avx512_sgesvd -fpk_lapack_avx512_sorgqr -fpk_lapack_avx512_sormqr -fpk_lapack_avx512_sormrq -fpk_lapack_avx512_spotrf -fpk_lapack_avx512_spotri -fpk_lapack_avx512_spotrs -fpk_lapack_avx512_sgetrf -fpk_lapack_avx512_sgetrs -fpk_lapack_avx512_spptrf -fpk_lapack_avx512_ssyev -fpk_lapack_avx512_ssyevd -fpk_lapack_avx512_strtrs -fpk_lapack_sse2_dgeqp3 -fpk_lapack_sse2_dgeqrf -fpk_lapack_sse2_dgerqf -fpk_lapack_sse2_dgesvd -fpk_lapack_sse2_dorgqr -fpk_lapack_sse2_dormqr -fpk_lapack_sse2_dormrq -fpk_lapack_sse2_dpotrf -fpk_lapack_sse2_dpotri -fpk_lapack_sse2_dpotrs -fpk_lapack_sse2_dgetrf -fpk_lapack_sse2_dgetrs -fpk_lapack_sse2_dpptrf -fpk_lapack_sse2_dsyev -fpk_lapack_sse2_dsyevd -fpk_lapack_sse2_dtrtrs -fpk_lapack_sse2_sgeqp3 -fpk_lapack_sse2_sgeqrf -fpk_lapack_sse2_sgerqf -fpk_lapack_sse2_sgesvd -fpk_lapack_sse2_sorgqr -fpk_lapack_sse2_sormqr -fpk_lapack_sse2_sormrq -fpk_lapack_sse2_spotrf -fpk_lapack_sse2_spotri -fpk_lapack_sse2_spotrs -fpk_lapack_sse2_sgetrf -fpk_lapack_sse2_sgetrs -fpk_lapack_sse2_spptrf -fpk_lapack_sse2_ssyev -fpk_lapack_sse2_ssyevd -fpk_lapack_sse2_strtrs -fpk_lapack_sse42_dgeqp3 -fpk_lapack_sse42_dgeqrf -fpk_lapack_sse42_dgerqf -fpk_lapack_sse42_dgesvd -fpk_lapack_sse42_dorgqr -fpk_lapack_sse42_dormqr -fpk_lapack_sse42_dormrq -fpk_lapack_sse42_dpotrf -fpk_lapack_sse42_dpotri -fpk_lapack_sse42_dpotrs -fpk_lapack_sse42_dgetrf -fpk_lapack_sse42_dgetrs -fpk_lapack_sse42_dpptrf -fpk_lapack_sse42_dsyev -fpk_lapack_sse42_dsyevd -fpk_lapack_sse42_dtrtrs -fpk_lapack_sse42_sgeqp3 -fpk_lapack_sse42_sgeqrf -fpk_lapack_sse42_sgerqf -fpk_lapack_sse42_sgesvd -fpk_lapack_sse42_sorgqr -fpk_lapack_sse42_sormqr -fpk_lapack_sse42_sormrq -fpk_lapack_sse42_spotrf -fpk_lapack_sse42_spotri -fpk_lapack_sse42_spotrs -fpk_lapack_sse42_sgetrf -fpk_lapack_sse42_sgetrs -fpk_lapack_sse42_spptrf -fpk_lapack_sse42_ssyev -fpk_lapack_sse42_ssyevd -fpk_lapack_sse42_strtrs -fpk_serv_get_max_threads -fpk_serv_set_num_threads -fpk_serv_set_num_threads_local -fpk_serv_get_ncpus -fpk_serv_get_ncorespercpu -fpk_serv_get_ht -fpk_serv_get_nlogicalcores -fpk_spblas_avx2_mkl_dcsrmm -fpk_spblas_avx2_mkl_dcsrmultd -fpk_spblas_avx2_mkl_dcsrmv -fpk_spblas_avx2_mkl_scsrmm -fpk_spblas_avx2_mkl_scsrmultd -fpk_spblas_avx2_mkl_scsrmv -fpk_spblas_avx512_mkl_dcsrmm -fpk_spblas_avx512_mkl_dcsrmultd -fpk_spblas_avx512_mkl_dcsrmv -fpk_spblas_avx512_mkl_scsrmm -fpk_spblas_avx512_mkl_scsrmultd -fpk_spblas_avx512_mkl_scsrmv -fpk_spblas_sse2_mkl_dcsrmm -fpk_spblas_sse2_mkl_dcsrmultd -fpk_spblas_sse2_mkl_dcsrmv -fpk_spblas_sse2_mkl_scsrmm -fpk_spblas_sse2_mkl_scsrmultd -fpk_spblas_sse2_mkl_scsrmv -fpk_spblas_sse42_mkl_dcsrmm -fpk_spblas_sse42_mkl_dcsrmultd -fpk_spblas_sse42_mkl_dcsrmv -fpk_spblas_sse42_mkl_scsrmm -fpk_spblas_sse42_mkl_scsrmultd -fpk_spblas_sse42_mkl_scsrmv -fpk_serv_enable_instructions -fpk_serv_cpuisknm -fpk_serv_memmove_s -fpk_vsl_serv_threader_for -fpk_vsl_serv_threader_for_ordered -fpk_vsl_serv_threader_sections -fpk_vsl_serv_threader_ordered -fpk_vsl_serv_threader_get_num_threads_limit diff --git a/cpp/daal/src/threading/export_lnx32e.mkl.def b/cpp/daal/src/threading/export_lnx32e.mkl.def index 5a0da223cda..7962b0a8844 100644 --- a/cpp/daal/src/threading/export_lnx32e.mkl.def +++ b/cpp/daal/src/threading/export_lnx32e.mkl.def @@ -15,266 +15,3 @@ ;=============================================================================== EXPORTS -fpk_blas_avx2_daxpy -fpk_blas_avx2_dgemm -fpk_blas_avx2_dgemv -fpk_blas_avx2_dsymm -fpk_blas_avx2_dsyr -fpk_blas_avx2_dsyrk -fpk_blas_avx2_saxpy -fpk_blas_avx2_sgemm -fpk_blas_avx2_sgemv -fpk_blas_avx2_ssymm -fpk_blas_avx2_ssyr -fpk_blas_avx2_ssyrk -fpk_blas_avx2_xdgemm -fpk_blas_avx2_xdsymm -fpk_blas_avx2_xdsyr -fpk_blas_avx2_xdsyrk -fpk_blas_avx2_xsgemm -fpk_blas_avx2_xssymm -fpk_blas_avx2_xssyr -fpk_blas_avx2_xssyrk -fpk_blas_avx2_xsdot -fpk_blas_avx2_xddot -fpk_blas_avx512_daxpy -fpk_blas_avx512_dgemm -fpk_blas_avx512_dgemv -fpk_blas_avx512_dsymm -fpk_blas_avx512_dsyr -fpk_blas_avx512_dsyrk -fpk_blas_avx512_xsdot -fpk_blas_avx512_xddot -fpk_blas_avx512_saxpy -fpk_blas_avx512_sgemm -fpk_blas_avx512_sgemv -fpk_blas_avx512_ssymm -fpk_blas_avx512_ssyr -fpk_blas_avx512_ssyrk -fpk_blas_avx512_xdgemm -fpk_blas_avx512_xdsymm -fpk_blas_avx512_xdsyr -fpk_blas_avx512_xdsyrk -fpk_blas_avx512_xsgemm -fpk_blas_avx512_xssymm -fpk_blas_avx512_xssyr -fpk_blas_avx512_xssyrk -fpk_blas_sse2_daxpy -fpk_blas_sse2_dgemm -fpk_blas_sse2_dgemv -fpk_blas_sse2_dsymm -fpk_blas_sse2_dsyr -fpk_blas_sse2_dsyrk -fpk_blas_sse2_saxpy -fpk_blas_sse2_sgemm -fpk_blas_sse2_sgemv -fpk_blas_sse2_ssymm -fpk_blas_sse2_ssyr -fpk_blas_sse2_ssyrk -fpk_blas_sse2_xdgemm -fpk_blas_sse2_xdsymm -fpk_blas_sse2_xdsyr -fpk_blas_sse2_xdsyrk -fpk_blas_sse2_xsgemm -fpk_blas_sse2_xssymm -fpk_blas_sse2_xssyr -fpk_blas_sse2_xssyrk -fpk_blas_sse2_xsdot -fpk_blas_sse2_xddot -fpk_blas_sse42_daxpy -fpk_blas_sse42_dgemm -fpk_blas_sse42_dgemv -fpk_blas_sse42_dsymm -fpk_blas_sse42_dsyr -fpk_blas_sse42_dsyrk -fpk_blas_sse42_saxpy -fpk_blas_sse42_sgemm -fpk_blas_sse42_sgemv -fpk_blas_sse42_ssymm -fpk_blas_sse42_ssyr -fpk_blas_sse42_ssyrk -fpk_blas_sse42_xdgemm -fpk_blas_sse42_xdsymm -fpk_blas_sse42_xdsyr -fpk_blas_sse42_xdsyrk -fpk_blas_sse42_xsgemm -fpk_blas_sse42_xssymm -fpk_blas_sse42_xssyr -fpk_blas_sse42_xssyrk -fpk_blas_sse42_xsdot -fpk_blas_sse42_xddot -fpk_dft_avx2_ippsSortRadixAscend_32f_I -fpk_dft_avx2_ippsSortRadixAscend_64f_I -fpk_dft_avx512_ippsSortRadixAscend_32f_I -fpk_dft_avx512_ippsSortRadixAscend_64f_I -fpk_dft_sse2_ippsSortRadixAscend_32f_I -fpk_dft_sse2_ippsSortRadixAscend_64f_I -fpk_dft_sse42_ippsSortRadixAscend_32f_I -fpk_dft_sse42_ippsSortRadixAscend_64f_I -fpk_lapack_avx2_dgeqp3 -fpk_lapack_avx2_dgeqrf -fpk_lapack_avx2_dgerqf -fpk_lapack_avx2_dgesvd -fpk_lapack_avx2_dorgqr -fpk_lapack_avx2_dormqr -fpk_lapack_avx2_dormrq -fpk_lapack_avx2_dpotrf -fpk_lapack_avx2_dpotri -fpk_lapack_avx2_dpotrs -fpk_lapack_avx2_dgetrf -fpk_lapack_avx2_dgetrs -fpk_lapack_avx2_dpptrf -fpk_lapack_avx2_dsyev -fpk_lapack_avx2_dsyevd -fpk_lapack_avx2_dtrtrs -fpk_lapack_avx2_sgeqp3 -fpk_lapack_avx2_sgeqrf -fpk_lapack_avx2_sgerqf -fpk_lapack_avx2_sgesvd -fpk_lapack_avx2_sorgqr -fpk_lapack_avx2_sormqr -fpk_lapack_avx2_sormrq -fpk_lapack_avx2_spotrf -fpk_lapack_avx2_spotri -fpk_lapack_avx2_spotrs -fpk_lapack_avx2_sgetrf -fpk_lapack_avx2_sgetrs -fpk_lapack_avx2_spptrf -fpk_lapack_avx2_ssyev -fpk_lapack_avx2_ssyevd -fpk_lapack_avx2_strtrs -fpk_lapack_avx512_dgeqp3 -fpk_lapack_avx512_dgeqrf -fpk_lapack_avx512_dgerqf -fpk_lapack_avx512_dgesvd -fpk_lapack_avx512_dorgqr -fpk_lapack_avx512_dormqr -fpk_lapack_avx512_dormrq -fpk_lapack_avx512_dpotrf -fpk_lapack_avx512_dpotri -fpk_lapack_avx512_dpotrs -fpk_lapack_avx512_dgetrf -fpk_lapack_avx512_dgetrs -fpk_lapack_avx512_dpptrf -fpk_lapack_avx512_dsyev -fpk_lapack_avx512_dsyevd -fpk_lapack_avx512_dtrtrs -fpk_lapack_avx512_sgeqp3 -fpk_lapack_avx512_sgeqrf -fpk_lapack_avx512_sgerqf -fpk_lapack_avx512_sgesvd -fpk_lapack_avx512_sorgqr -fpk_lapack_avx512_sormqr -fpk_lapack_avx512_sormrq -fpk_lapack_avx512_spotrf -fpk_lapack_avx512_spotri -fpk_lapack_avx512_spotrs -fpk_lapack_avx512_sgetrf -fpk_lapack_avx512_sgetrs -fpk_lapack_avx512_spptrf -fpk_lapack_avx512_ssyev -fpk_lapack_avx512_ssyevd -fpk_lapack_avx512_strtrs -fpk_lapack_sse2_dgeqp3 -fpk_lapack_sse2_dgeqrf -fpk_lapack_sse2_dgerqf -fpk_lapack_sse2_dgesvd -fpk_lapack_sse2_dorgqr -fpk_lapack_sse2_dormqr -fpk_lapack_sse2_dormrq -fpk_lapack_sse2_dpotrf -fpk_lapack_sse2_dpotri -fpk_lapack_sse2_dpotrs -fpk_lapack_sse2_dgetrf -fpk_lapack_sse2_dgetrs -fpk_lapack_sse2_dpptrf -fpk_lapack_sse2_dsyev -fpk_lapack_sse2_dsyevd -fpk_lapack_sse2_dtrtrs -fpk_lapack_sse2_sgeqp3 -fpk_lapack_sse2_sgeqrf -fpk_lapack_sse2_sgerqf -fpk_lapack_sse2_sgesvd -fpk_lapack_sse2_sorgqr -fpk_lapack_sse2_sormqr -fpk_lapack_sse2_sormrq -fpk_lapack_sse2_spotrf -fpk_lapack_sse2_spotri -fpk_lapack_sse2_spotrs -fpk_lapack_sse2_sgetrf -fpk_lapack_sse2_sgetrs -fpk_lapack_sse2_spptrf -fpk_lapack_sse2_ssyev -fpk_lapack_sse2_ssyevd -fpk_lapack_sse2_strtrs -fpk_lapack_sse42_dgeqp3 -fpk_lapack_sse42_dgeqrf -fpk_lapack_sse42_dgerqf -fpk_lapack_sse42_dgesvd -fpk_lapack_sse42_dorgqr -fpk_lapack_sse42_dormqr -fpk_lapack_sse42_dormrq -fpk_lapack_sse42_dpotrf -fpk_lapack_sse42_dpotri -fpk_lapack_sse42_dpotrs -fpk_lapack_sse42_dgetrf -fpk_lapack_sse42_dgetrs -fpk_lapack_sse42_dpptrf -fpk_lapack_sse42_dsyev -fpk_lapack_sse42_dsyevd -fpk_lapack_sse42_dtrtrs -fpk_lapack_sse42_sgeqp3 -fpk_lapack_sse42_sgeqrf -fpk_lapack_sse42_sgerqf -fpk_lapack_sse42_sgesvd -fpk_lapack_sse42_sorgqr -fpk_lapack_sse42_sormqr -fpk_lapack_sse42_sormrq -fpk_lapack_sse42_spotrf -fpk_lapack_sse42_spotri -fpk_lapack_sse42_spotrs -fpk_lapack_sse42_sgetrf -fpk_lapack_sse42_sgetrs -fpk_lapack_sse42_spptrf -fpk_lapack_sse42_ssyev -fpk_lapack_sse42_ssyevd -fpk_lapack_sse42_strtrs -fpk_serv_get_max_threads -fpk_serv_set_num_threads -fpk_serv_set_num_threads_local -fpk_serv_get_ncpus -fpk_serv_get_ncorespercpu -fpk_serv_get_ht -fpk_serv_get_nlogicalcores -fpk_spblas_avx2_mkl_dcsrmm -fpk_spblas_avx2_mkl_dcsrmultd -fpk_spblas_avx2_mkl_dcsrmv -fpk_spblas_avx2_mkl_scsrmm -fpk_spblas_avx2_mkl_scsrmultd -fpk_spblas_avx2_mkl_scsrmv -fpk_spblas_avx512_mkl_dcsrmm -fpk_spblas_avx512_mkl_dcsrmultd -fpk_spblas_avx512_mkl_dcsrmv -fpk_spblas_avx512_mkl_scsrmm -fpk_spblas_avx512_mkl_scsrmultd -fpk_spblas_avx512_mkl_scsrmv -fpk_spblas_sse2_mkl_dcsrmm -fpk_spblas_sse2_mkl_dcsrmultd -fpk_spblas_sse2_mkl_dcsrmv -fpk_spblas_sse2_mkl_scsrmm -fpk_spblas_sse2_mkl_scsrmultd -fpk_spblas_sse2_mkl_scsrmv -fpk_spblas_sse42_mkl_dcsrmm -fpk_spblas_sse42_mkl_dcsrmultd -fpk_spblas_sse42_mkl_dcsrmv -fpk_spblas_sse42_mkl_scsrmm -fpk_spblas_sse42_mkl_scsrmultd -fpk_spblas_sse42_mkl_scsrmv -fpk_serv_enable_instructions -fpk_serv_cpuisknm -fpk_serv_memmove_s -fpk_vsl_serv_threader_for -fpk_vsl_serv_threader_for_ordered -fpk_vsl_serv_threader_sections -fpk_vsl_serv_threader_ordered -fpk_vsl_serv_threader_get_num_threads_limit diff --git a/cpp/daal/src/threading/export_mac.def b/cpp/daal/src/threading/export_mac.def index af053955a02..7962b0a8844 100644 --- a/cpp/daal/src/threading/export_mac.def +++ b/cpp/daal/src/threading/export_mac.def @@ -15,204 +15,3 @@ ;=============================================================================== EXPORTS -_fpk_blas_avx2_daxpy -_fpk_blas_avx2_dgemm -_fpk_blas_avx2_dgemv -_fpk_blas_avx2_dsymm -_fpk_blas_avx2_dsyr -_fpk_blas_avx2_dsyrk -_fpk_blas_avx2_saxpy -_fpk_blas_avx2_sgemm -_fpk_blas_avx2_sgemv -_fpk_blas_avx2_ssymm -_fpk_blas_avx2_ssyr -_fpk_blas_avx2_ssyrk -_fpk_blas_avx2_xdgemm -_fpk_blas_avx2_xdsymm -_fpk_blas_avx2_xdsyr -_fpk_blas_avx2_xdsyrk -_fpk_blas_avx2_xsgemm -_fpk_blas_avx2_xssymm -_fpk_blas_avx2_xssyr -_fpk_blas_avx2_xssyrk -_fpk_blas_avx2_xsdot -_fpk_blas_avx2_xddot -_fpk_blas_avx512_daxpy -_fpk_blas_avx512_dgemm -_fpk_blas_avx512_dgemv -_fpk_blas_avx512_dsymm -_fpk_blas_avx512_dsyr -_fpk_blas_avx512_dsyrk -_fpk_blas_avx512_saxpy -_fpk_blas_avx512_sgemm -_fpk_blas_avx512_sgemv -_fpk_blas_avx512_ssymm -_fpk_blas_avx512_ssyr -_fpk_blas_avx512_ssyrk -_fpk_blas_avx512_xdgemm -_fpk_blas_avx512_xdsymm -_fpk_blas_avx512_xdsyr -_fpk_blas_avx512_xdsyrk -_fpk_blas_avx512_xsgemm -_fpk_blas_avx512_xssymm -_fpk_blas_avx512_xssyr -_fpk_blas_avx512_xssyrk -_fpk_blas_avx512_xsdot -_fpk_blas_avx512_xddot -_fpk_blas_avx_daxpy -_fpk_blas_avx_dgemm -_fpk_blas_avx_dgemv -_fpk_blas_avx_dsymm -_fpk_blas_avx_dsyr -_fpk_blas_avx_dsyrk -_fpk_blas_avx_saxpy -_fpk_blas_avx_sgemm -_fpk_blas_avx_sgemv -_fpk_blas_avx_ssymm -_fpk_blas_avx_ssyr -_fpk_blas_avx_ssyrk -_fpk_blas_avx_xdgemm -_fpk_blas_avx_xdsymm -_fpk_blas_avx_xdsyr -_fpk_blas_avx_xdsyrk -_fpk_blas_avx_xsgemm -_fpk_blas_avx_xssymm -_fpk_blas_avx_xssyr -_fpk_blas_avx_xssyrk -_fpk_blas_avx_xsdot -_fpk_blas_avx_xddot -_fpk_dft_avx2_ippsSortRadixAscend_32f_I -_fpk_dft_avx2_ippsSortRadixAscend_64f_I -_fpk_dft_avx512_ippsSortRadixAscend_32f_I -_fpk_dft_avx512_ippsSortRadixAscend_64f_I -_fpk_dft_avx_ippsSortRadixAscend_32f_I -_fpk_dft_avx_ippsSortRadixAscend_64f_I -_fpk_lapack_avx2_dgeqp3 -_fpk_lapack_avx2_dgeqrf -_fpk_lapack_avx2_dgerqf -_fpk_lapack_avx2_dgesvd -_fpk_lapack_avx2_dorgqr -_fpk_lapack_avx2_dormqr -_fpk_lapack_avx2_dormrq -_fpk_lapack_avx2_dpotrf -_fpk_lapack_avx2_dpotri -_fpk_lapack_avx2_dpotrs -_fpk_lapack_avx2_dgetrf -_fpk_lapack_avx2_dgetrs -_fpk_lapack_avx2_dpptrf -_fpk_lapack_avx2_dsyev -_fpk_lapack_avx2_dsyevd -_fpk_lapack_avx2_dtrtrs -_fpk_lapack_avx2_sgeqp3 -_fpk_lapack_avx2_sgeqrf -_fpk_lapack_avx2_sgerqf -_fpk_lapack_avx2_sgesvd -_fpk_lapack_avx2_sorgqr -_fpk_lapack_avx2_sormqr -_fpk_lapack_avx2_sormrq -_fpk_lapack_avx2_spotrf -_fpk_lapack_avx2_spotri -_fpk_lapack_avx2_spotrs -_fpk_lapack_avx2_sgetrf -_fpk_lapack_avx2_sgetrs -_fpk_lapack_avx2_spptrf -_fpk_lapack_avx2_ssyev -_fpk_lapack_avx2_ssyevd -_fpk_lapack_avx2_strtrs -_fpk_lapack_avx512_dgeqp3 -_fpk_lapack_avx512_dgeqrf -_fpk_lapack_avx512_dgerqf -_fpk_lapack_avx512_dgesvd -_fpk_lapack_avx512_dorgqr -_fpk_lapack_avx512_dormqr -_fpk_lapack_avx512_dormrq -_fpk_lapack_avx512_dpotrf -_fpk_lapack_avx512_dpotri -_fpk_lapack_avx512_dpotrs -_fpk_lapack_avx512_dgetrf -_fpk_lapack_avx512_dgetrs -_fpk_lapack_avx512_dpptrf -_fpk_lapack_avx512_dsyev -_fpk_lapack_avx512_dsyevd -_fpk_lapack_avx512_dtrtrs -_fpk_lapack_avx512_sgeqp3 -_fpk_lapack_avx512_sgeqrf -_fpk_lapack_avx512_sgerqf -_fpk_lapack_avx512_sgesvd -_fpk_lapack_avx512_sorgqr -_fpk_lapack_avx512_sormqr -_fpk_lapack_avx512_sormrq -_fpk_lapack_avx512_spotrf -_fpk_lapack_avx512_spotri -_fpk_lapack_avx512_spotrs -_fpk_lapack_avx512_sgetrf -_fpk_lapack_avx512_sgetrs -_fpk_lapack_avx512_spptrf -_fpk_lapack_avx512_ssyev -_fpk_lapack_avx512_ssyevd -_fpk_lapack_avx512_strtrs -_fpk_lapack_avx_dgeqp3 -_fpk_lapack_avx_dgeqrf -_fpk_lapack_avx_dgerqf -_fpk_lapack_avx_dgesvd -_fpk_lapack_avx_dorgqr -_fpk_lapack_avx_dormqr -_fpk_lapack_avx_dormrq -_fpk_lapack_avx_dpotrf -_fpk_lapack_avx_dpotri -_fpk_lapack_avx_dpotrs -_fpk_lapack_avx_dgetrf -_fpk_lapack_avx_dgetrs -_fpk_lapack_avx_dpptrf -_fpk_lapack_avx_dsyev -_fpk_lapack_avx_dsyevd -_fpk_lapack_avx_dtrtrs -_fpk_lapack_avx_sgeqp3 -_fpk_lapack_avx_sgeqrf -_fpk_lapack_avx_sgerqf -_fpk_lapack_avx_sgesvd -_fpk_lapack_avx_sorgqr -_fpk_lapack_avx_sormqr -_fpk_lapack_avx_sormrq -_fpk_lapack_avx_spotrf -_fpk_lapack_avx_spotri -_fpk_lapack_avx_spotrs -_fpk_lapack_avx_sgetrf -_fpk_lapack_avx_sgetrs -_fpk_lapack_avx_spptrf -_fpk_lapack_avx_ssyev -_fpk_lapack_avx_ssyevd -_fpk_lapack_avx_strtrs -_fpk_serv_get_max_threads -_fpk_serv_set_num_threads -_fpk_serv_set_num_threads_local -_fpk_serv_get_ncpus -_fpk_serv_get_ncorespercpu -_fpk_serv_get_ht -_fpk_serv_get_nlogicalcores -_fpk_spblas_avx2_mkl_dcsrmm -_fpk_spblas_avx2_mkl_dcsrmultd -_fpk_spblas_avx2_mkl_dcsrmv -_fpk_spblas_avx2_mkl_scsrmm -_fpk_spblas_avx2_mkl_scsrmultd -_fpk_spblas_avx2_mkl_scsrmv -_fpk_spblas_avx512_mkl_dcsrmm -_fpk_spblas_avx512_mkl_dcsrmultd -_fpk_spblas_avx512_mkl_dcsrmv -_fpk_spblas_avx512_mkl_scsrmm -_fpk_spblas_avx512_mkl_scsrmultd -_fpk_spblas_avx512_mkl_scsrmv -_fpk_spblas_avx_mkl_dcsrmm -_fpk_spblas_avx_mkl_dcsrmultd -_fpk_spblas_avx_mkl_dcsrmv -_fpk_spblas_avx_mkl_scsrmm -_fpk_spblas_avx_mkl_scsrmultd -_fpk_spblas_avx_mkl_scsrmv -_fpk_serv_enable_instructions -_fpk_serv_cpuisknm -_fpk_serv_memmove_s -_fpk_vsl_serv_threader_for -_fpk_vsl_serv_threader_for_ordered -_fpk_vsl_serv_threader_sections -_fpk_vsl_serv_threader_ordered -_fpk_vsl_serv_threader_get_num_threads_limit diff --git a/cpp/oneapi/dal/BUILD b/cpp/oneapi/dal/BUILD index ff6d770cc7c..7a3ba863105 100644 --- a/cpp/oneapi/dal/BUILD +++ b/cpp/oneapi/dal/BUILD @@ -30,7 +30,7 @@ dal_module( "@onedal//cpp/daal:data_management", ], dpc_deps = [ - "@micromkl_dpc//:mkl_dpc", + "@mkl//:mkl_dpc", ], ) @@ -53,7 +53,6 @@ dal_collect_modules( modules = [ "algo", "io", - "backend/micromkl", "backend/primitives", ], ) @@ -175,7 +174,6 @@ dal_collect_test_suites( "io", "table", "util", - "backend/micromkl", "backend/primitives", ], tests = [ diff --git a/cpp/oneapi/dal/algo/pca/backend/gpu/finalize_train_kernel_cov_impl_dpc.cpp b/cpp/oneapi/dal/algo/pca/backend/gpu/finalize_train_kernel_cov_impl_dpc.cpp index 12862ab04ba..17d504b804e 100644 --- a/cpp/oneapi/dal/algo/pca/backend/gpu/finalize_train_kernel_cov_impl_dpc.cpp +++ b/cpp/oneapi/dal/algo/pca/backend/gpu/finalize_train_kernel_cov_impl_dpc.cpp @@ -113,18 +113,22 @@ result_t finalize_train_kernel_cov_impl::operator()(const descriptor_t& d data_to_compute = corr; } - auto [eigvecs, eigvals] = compute_eigenvectors_on_host(q, - std::move(data_to_compute), - component_count, - { corr_event, vars_event, cov_event }); + auto [eigvals, syevd_event] = + syevd_computation(q, data_to_compute, { cov_event, corr_event, vars_event }); + + auto flipped_eigvals_host = flip_eigenvalues(q, eigvals, component_count, { syevd_event }); + + auto flipped_eigenvectors_host = + flip_eigenvectors(q, data_to_compute, component_count, { syevd_event }); if (desc.get_result_options().test(result_options::eigenvalues)) { - result.set_eigenvalues(homogen_table::wrap(eigvals.flatten(), 1, component_count)); + result.set_eigenvalues( + homogen_table::wrap(flipped_eigvals_host.flatten(), 1, component_count)); } if (desc.get_result_options().test(result_options::singular_values)) { auto singular_values = compute_singular_values_on_host(q, - eigvals, + flipped_eigvals_host, rows_count_global, { corr_event, vars_event, cov_event }); result.set_singular_values( @@ -135,7 +139,7 @@ result_t finalize_train_kernel_cov_impl::operator()(const descriptor_t& d auto vars_host = vars.to_host(q); auto explained_variances_ratio = compute_explained_variances_on_host(q, - eigvals, + flipped_eigvals_host, vars_host, { corr_event, vars_event, cov_event }); result.set_explained_variances_ratio( @@ -143,12 +147,13 @@ result_t finalize_train_kernel_cov_impl::operator()(const descriptor_t& d } if (desc.get_deterministic()) { - sign_flip(eigvecs); + sign_flip(flipped_eigenvectors_host); } if (desc.get_result_options().test(result_options::eigenvectors)) { - result.set_eigenvectors( - homogen_table::wrap(eigvecs.flatten(), component_count, column_count)); + result.set_eigenvectors(homogen_table::wrap(flipped_eigenvectors_host.flatten(), + component_count, + column_count)); } return result; diff --git a/cpp/oneapi/dal/algo/pca/backend/gpu/misc.hpp b/cpp/oneapi/dal/algo/pca/backend/gpu/misc.hpp index d86ee3a04be..8df5d89d985 100644 --- a/cpp/oneapi/dal/algo/pca/backend/gpu/misc.hpp +++ b/cpp/oneapi/dal/algo/pca/backend/gpu/misc.hpp @@ -57,6 +57,113 @@ auto compute_sums(sycl::queue& queue, return std::make_tuple(sums, sums_event); } +/// A wrapper that computes 1d array of eigenvalues and 2d array of eigenvectors from the covariance matrix +/// +/// @tparam Float Floating-point type used to perform computations +/// +/// @param[in] queue The SYCL queue +/// @param[in] corr The input covariance/correlation matrix of size `column_count` x `column_count` +/// @param[in] deps Events indicating availability of the `data` for reading or writing +/// +/// @return A tuple of two elements, where the first element is the resulting 2d array of eigenvectors +/// of size `component_count` x `column_count` and the second element is the resulting 1d array of eigenvalues +template +auto syevd_computation(sycl::queue& queue, + pr::ndview& corr, + const bk::event_vector& deps = {}) { + sycl::event::wait_and_throw(deps); + + const std::int64_t column_count = corr.get_dimension(1); + auto eigenvalues = pr::ndarray::empty(queue, { column_count }, alloc::device); + + std::int64_t lda = column_count; + + sycl::event syevd_event; + { + syevd_event = pr::syevd(queue, + column_count, + corr, + lda, + eigenvalues, + { deps }); + } + syevd_event.wait_and_throw(); + return std::make_tuple(eigenvalues, syevd_event); +} + +/// A wrapper that flips 2d array of eigenvectors from the syevd result in necessary order +/// +/// @tparam Float Floating-point type used to perform computations +/// +/// @param[in] queue The SYCL queue +/// @param[in] data The input eigenvectors in ascending order of size `column_count` x `column_count` +/// @param[in] component_count The number of `component_count` of the descriptor +/// @param[in] deps Events indicating availability of the `data` for reading or writing +/// +/// @return The resulting 2d array of eigenvectors +template +auto flip_eigenvectors(sycl::queue& queue, + pr::ndview& data, + std::int64_t component_count, + const bk::event_vector& deps = {}) { + const std::int64_t column_count = data.get_dimension(1); + const std::int64_t row_count = data.get_dimension(0); + auto data_ptr = data.get_data(); + auto eigenvectors = + pr::ndarray::empty(queue, { component_count, column_count }, alloc::device); + auto eigenvectors_ptr = eigenvectors.get_mutable_data(); + auto flip_event = queue.submit([&](sycl::handler& h) { + const auto range = bk::make_range_2d(component_count, column_count); + h.depends_on(deps); + h.parallel_for(range, [=](sycl::id<2> id) { + const std::int64_t row = id[0]; + const std::int64_t column = id[1]; + eigenvectors_ptr[row * column_count + column] = + data_ptr[(row_count - 1 - row) * column_count + column]; + }); + }); + + flip_event.wait_and_throw(); + auto flipped_eigenvectors_host = eigenvectors.to_host(queue); + + return flipped_eigenvectors_host; +} + +/// A wrapper that flips 1d array of eigenvalues from syevd result in descending order +/// +/// @tparam Float Floating-point type used to perform computations +/// +/// @param[in] queue The SYCL queue +/// @param[in] eigenvalues The input eigenvalues in ascending order of size `column_count` +/// @param[in] component_count The number of `component_count` of the descriptor +/// @param[in] deps Events indicating availability of the `data` for reading or writing +/// +/// @return The resulting 1d array of eigenvalues +template +auto flip_eigenvalues(sycl::queue& queue, + pr::ndview& eigenvalues, + std::int64_t component_count, + const bk::event_vector& deps = {}) { + auto column_count = eigenvalues.get_dimension(0); + auto data_ptr = eigenvalues.get_data(); + auto flipped_eigenvalues = + pr::ndarray::empty(queue, { component_count }, alloc::device); + auto flipped_eigenvalues_ptr = flipped_eigenvalues.get_mutable_data(); + auto flip_event = queue.submit([&](sycl::handler& h) { + const auto range = bk::make_range_1d(component_count); + h.depends_on(deps); + h.parallel_for(range, [=](sycl::id<1> id) { + const std::int64_t col = id[0]; + flipped_eigenvalues_ptr[col] = data_ptr[(column_count - 1) - col]; + }); + }); + + flip_event.wait_and_throw(); + auto flipped_eigenvalues_host = flipped_eigenvalues.to_host(queue); + + return flipped_eigenvalues_host; +} + /// A wrapper that computes 1d array of means of the columns from precomputed sums /// /// @tparam Float Floating-point type used to perform computations @@ -290,36 +397,6 @@ auto compute_correlation_from_covariance(sycl::queue& queue, // SVD method -/// A wrapper that computes 1d array of eigenvalues and 2d array of eigenvectors from the covariance matrix -/// -/// @tparam Float Floating-point type used to perform computations -/// -/// @param[in] queue The SYCL queue -/// @param[in] corr The input covariance/correlation matrix of size `column_count` x `column_count` -/// @param[in] component_count The number of `component_count` of the descriptor -/// @param[in] deps Events indicating availability of the `data` for reading or writing -/// -/// @return A tuple of two elements, where the first element is the resulting 2d array of eigenvectors -/// of size `component_count` x `column_count` and the second element is the resulting 1d array of eigenvalues -template -auto compute_eigenvectors_on_host(sycl::queue& queue, - pr::ndarray&& corr, - std::int64_t component_count, - const dal::backend::event_vector& deps = {}) { - ONEDAL_PROFILER_TASK(compute_eigenvectors_on_host); - ONEDAL_ASSERT(corr.get_dimension(0) == corr.get_dimension(1), - "Correlation matrix must be square"); - ONEDAL_ASSERT(corr.get_dimension(0) > 0); - const std::int64_t column_count = corr.get_dimension(0); - - auto eigvecs = pr::ndarray::empty({ component_count, column_count }); - auto eigvals = pr::ndarray::empty(component_count); - auto host_corr = corr.to_host(queue, deps); - pr::sym_eigvals_descending(host_corr, component_count, eigvecs, eigvals); - - return std::make_tuple(eigvecs, eigvals); -} - /// A wrapper that computes 1d array of eigenvalues from the 1d array of the singular values /// /// @tparam Float Floating-point type used to perform computations diff --git a/cpp/oneapi/dal/algo/pca/backend/gpu/train_kernel_cov_impl_dpc.cpp b/cpp/oneapi/dal/algo/pca/backend/gpu/train_kernel_cov_impl_dpc.cpp index feaa810230f..da397a273a5 100644 --- a/cpp/oneapi/dal/algo/pca/backend/gpu/train_kernel_cov_impl_dpc.cpp +++ b/cpp/oneapi/dal/algo/pca/backend/gpu/train_kernel_cov_impl_dpc.cpp @@ -70,7 +70,7 @@ result_t train_kernel_cov_impl::operator()(const descriptor_t& desc, cons const auto data_nd = pr::table2ndarray(q_, data, alloc::device); auto [sums, sums_event] = compute_sums(q_, data_nd); - + sums_event.wait_and_throw(); { ONEDAL_PROFILER_TASK(allreduce_sums, q_); comm_.allreduce(sums.flatten(q_, { sums_event }), spmd::reduce_op::sum).wait(); @@ -97,12 +97,13 @@ result_t train_kernel_cov_impl::operator()(const descriptor_t& desc, cons sycl::event means_event; if (desc.get_result_options().test(result_options::means)) { auto [means, means_event] = compute_means(q_, sums, rows_count_global, { gemm_event }); + means_event.wait_and_throw(); result.set_means(homogen_table::wrap(means.flatten(q_, { means_event }), 1, column_count)); } auto [cov, cov_event] = compute_covariance(q_, rows_count_global, xtx, sums, bias, { gemm_event }); - + cov_event.wait_and_throw(); auto [vars, vars_event] = compute_variances(q_, cov, { cov_event, means_event }); if (desc.get_result_options().test(result_options::vars)) { @@ -110,53 +111,57 @@ result_t train_kernel_cov_impl::operator()(const descriptor_t& desc, cons homogen_table::wrap(vars.flatten(q_, { vars_event }), 1, column_count)); } - auto data_to_compute = cov; + auto eigenvectors = cov; sycl::event corr_event; if (desc.get_normalization_mode() == normalization::zscore) { auto corr = pr::ndarray::empty(q_, { column_count, column_count }, alloc::device); corr_event = pr::correlation_from_covariance(q_, rows_count_global, cov, corr, bias, { cov_event }); - data_to_compute = corr; + eigenvectors = corr; + corr_event.wait_and_throw(); } - auto [eigvecs, eigvals] = compute_eigenvectors_on_host(q_, - std::move(data_to_compute), - component_count, - { cov_event, corr_event, vars_event }); + auto [eigvals, syevd_event] = + syevd_computation(q_, eigenvectors, { cov_event, corr_event, vars_event }); + + auto flipped_eigvals_host = flip_eigenvalues(q_, eigvals, component_count, { syevd_event }); if (desc.get_result_options().test(result_options::eigenvalues)) { - result.set_eigenvalues(homogen_table::wrap(eigvals.flatten(), 1, component_count)); + result.set_eigenvalues( + homogen_table::wrap(flipped_eigvals_host.flatten(), 1, component_count)); } + auto flipped_eigenvectors_host = + flip_eigenvectors(q_, eigenvectors, component_count, { syevd_event }); + if (desc.get_result_options().test(result_options::singular_values)) { - auto singular_values = - compute_singular_values_on_host(q_, - eigvals, - rows_count_global, - { cov_event, corr_event, vars_event }); + auto singular_values = compute_singular_values_on_host(q_, + flipped_eigvals_host, + rows_count_global, + { syevd_event }); result.set_singular_values( homogen_table::wrap(singular_values.flatten(), 1, component_count)); } if (desc.get_result_options().test(result_options::explained_variances_ratio)) { auto vars_host = vars.to_host(q_); - auto explained_variances_ratio = - compute_explained_variances_on_host(q_, - eigvals, - vars_host, - { cov_event, corr_event, vars_event }); + auto explained_variances_ratio = compute_explained_variances_on_host(q_, + flipped_eigvals_host, + vars_host, + { syevd_event }); result.set_explained_variances_ratio( homogen_table::wrap(explained_variances_ratio.flatten(), 1, component_count)); } if (desc.get_deterministic()) { - sign_flip(eigvecs); + sign_flip(flipped_eigenvectors_host); } if (desc.get_result_options().test(result_options::eigenvectors)) { - result.set_eigenvectors( - homogen_table::wrap(eigvecs.flatten(), component_count, column_count)); + result.set_eigenvectors(homogen_table::wrap(flipped_eigenvectors_host.flatten(), + flipped_eigenvectors_host.get_dimension(0), + flipped_eigenvectors_host.get_dimension(1))); } return result; diff --git a/cpp/oneapi/dal/algo/pca/backend/gpu/train_kernel_precomputed_impl_dpc.cpp b/cpp/oneapi/dal/algo/pca/backend/gpu/train_kernel_precomputed_impl_dpc.cpp index 75970b945f9..a32ffb379a4 100644 --- a/cpp/oneapi/dal/algo/pca/backend/gpu/train_kernel_precomputed_impl_dpc.cpp +++ b/cpp/oneapi/dal/algo/pca/backend/gpu/train_kernel_precomputed_impl_dpc.cpp @@ -65,18 +65,25 @@ result_t train_kernel_precomputed_impl::operator()(const descriptor_t& de } if (desc.get_result_options().test(result_options::eigenvectors | result_options::eigenvalues)) { - auto [eigvecs, eigvals] = - compute_eigenvectors_on_host(q_, std::move(data_nd), component_count); + auto [eigvals, syevd_event] = syevd_computation(q_, data_nd, {}); + + auto flipped_eigvals_host = flip_eigenvalues(q_, eigvals, component_count, { syevd_event }); + + auto flipped_eigenvectors_host = + flip_eigenvectors(q_, data_nd, component_count, { syevd_event }); if (desc.get_result_options().test(result_options::eigenvalues)) { - result.set_eigenvalues(homogen_table::wrap(eigvals.flatten(), 1, component_count)); + result.set_eigenvalues( + homogen_table::wrap(flipped_eigvals_host.flatten(), 1, component_count)); } if (desc.get_deterministic()) { - sign_flip(eigvecs); + sign_flip(flipped_eigenvectors_host); } if (desc.get_result_options().test(result_options::eigenvectors)) { result.set_eigenvectors( - homogen_table::wrap(eigvecs.flatten(), component_count, column_count)); + homogen_table::wrap(flipped_eigenvectors_host.flatten(), + flipped_eigenvectors_host.get_dimension(0), + flipped_eigenvectors_host.get_dimension(1))); } } diff --git a/cpp/oneapi/dal/algo/pca/backend/gpu/train_kernel_svd_impl_dpc.cpp b/cpp/oneapi/dal/algo/pca/backend/gpu/train_kernel_svd_impl_dpc.cpp index d5e6d3f9fbd..87095c5f912 100644 --- a/cpp/oneapi/dal/algo/pca/backend/gpu/train_kernel_svd_impl_dpc.cpp +++ b/cpp/oneapi/dal/algo/pca/backend/gpu/train_kernel_svd_impl_dpc.cpp @@ -33,7 +33,7 @@ namespace oneapi::dal::pca::backend { namespace bk = dal::backend; namespace pr = dal::backend::primitives; -namespace mkl = oneapi::fpk; +namespace mkl = oneapi::mkl; using alloc = sycl::usm::alloc; using bk::context_gpu; diff --git a/cpp/oneapi/dal/backend/micromkl/BUILD b/cpp/oneapi/dal/backend/micromkl/BUILD deleted file mode 100644 index 52a5e4bd86b..00000000000 --- a/cpp/oneapi/dal/backend/micromkl/BUILD +++ /dev/null @@ -1,20 +0,0 @@ -package(default_visibility = ["//visibility:public"]) -load("@onedal//dev/bazel:dal.bzl", - "dal_module", - "dal_test_suite", -) - -dal_module( - name = "micromkl", - auto = True, - dal_deps = [ - "@onedal//cpp/oneapi/dal:common", - ], -) - -dal_test_suite( - name = "tests", - framework = "catch2", - private = True, - dal_deps = [], -) diff --git a/cpp/oneapi/dal/backend/micromkl/macro.hpp b/cpp/oneapi/dal/backend/micromkl/macro.hpp deleted file mode 100644 index 35b24d3e701..00000000000 --- a/cpp/oneapi/dal/backend/micromkl/macro.hpp +++ /dev/null @@ -1,168 +0,0 @@ -/******************************************************************************* -* Copyright 2021 Intel Corporation -* Copyright contributors to the oneDAL project -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*******************************************************************************/ - -#pragma once - -#include - -#ifndef __MICROMKL_INCLUDE_GUARD__ -#error "This header cannot be included outside of micromkl module" -#endif - -#define STRINGIFY(x) #x -#define DAL_EXPAND(...) __VA_ARGS__ - -#ifdef ONEDAL_REF -#define FUNC_NAME(prefix, name) name -#define FUNC_NAME_CPU(cpu, prefix, name) name -#else -#define FUNC_NAME(prefix, name) prefix##_##name -#define FUNC_NAME_CPU(cpu, prefix, name) prefix##_##cpu##_##name -#endif - -#define DISPATCH_ID_NAME(cpu) oneapi::dal::backend::cpu_dispatch_##cpu - -#define FUNC_CPU_DECL(cpu, prefix, name, argdecl) \ - extern "C" void FUNC_NAME_CPU(cpu, prefix, name) argdecl; - -#define DISPATCH_FUNC_DECL(prefix, name, arcdecl) \ - template \ - ONEDAL_FORCEINLINE void FUNC_NAME(prefix, name) arcdecl; - -#define DISPATCH_FUNC_CPU(nominal_cpu, actual_cpu, prefix, name, arcdecl, argcall) \ - template <> \ - ONEDAL_FORCEINLINE void FUNC_NAME(prefix, name) arcdecl { \ - FUNC_NAME_CPU(actual_cpu, prefix, name) argcall; \ - } - -#define FUNC_CPU(nominal_cpu, actual_cpu, prefix, name, argdecl, argcall) \ - FUNC_CPU_DECL(nominal_cpu, prefix, name, argdecl) \ - DISPATCH_FUNC_CPU(nominal_cpu, actual_cpu, prefix, name, argdecl, argcall) - -#if defined(TARGET_X86_64) -#define FUNC_AVX512(...) DAL_EXPAND(FUNC_CPU(avx512, avx512, __VA_ARGS__)) -#define FUNC_AVX2(...) DAL_EXPAND(FUNC_CPU(avx2, avx2, __VA_ARGS__)) -#elif defined(TARGET_ARM) -#define FUNC_A8SVE(...) DAL_EXPAND(FUNC_CPU(sve, sve, __VA_ARGS__)) -#elif defined(TARGET_RISCV64) -#define FUNC_RV64(...) DAL_EXPAND(FUNC_CPU(rv64, rv64, __VA_ARGS__)) -#endif - -#ifdef __APPLE__ -#define FUNC_SSE42(...) DAL_EXPAND(FUNC_CPU(sse42, avx2, __VA_ARGS__)) -#define FUNC_SSE2(...) DAL_EXPAND(FUNC_CPU(sse2, avx2, __VA_ARGS__)) -#else -#define FUNC_SSE42(...) DAL_EXPAND(FUNC_CPU(sse42, sse42, __VA_ARGS__)) -#define FUNC_SSE2(...) DAL_EXPAND(FUNC_CPU(sse2, sse2, __VA_ARGS__)) -#endif - -#if defined(TARGET_X86_64) -#define FUNC(prefix, name, argdecl, argcall) \ - DISPATCH_FUNC_DECL(prefix, name, argdecl) \ - FUNC_AVX512(prefix, name, argdecl, argcall) \ - FUNC_AVX2(prefix, name, argdecl, argcall) \ - FUNC_SSE42(prefix, name, argdecl, argcall) \ - FUNC_SSE2(prefix, name, argdecl, argcall) -#elif defined(TARGET_ARM) -#define FUNC(prefix, name, argdecl, argcall) \ - DISPATCH_FUNC_DECL(prefix, name, argdecl) \ - FUNC_A8SVE(prefix, name, argdecl, argcall) -#elif defined(TARGET_RISCV64) -#define FUNC(prefix, name, argdecl, argcall) \ - DISPATCH_FUNC_DECL(prefix, name, argdecl) \ - FUNC_RV64(prefix, name, argdecl, argcall) -#endif - -#ifdef ONEDAL_REF -#define FUNC_DECL(prefix, floatabr, name, argdecl, argcall) \ - FUNC(prefix, floatabr##name##_, argdecl, argcall) - -#define FUNC_CALL(prefix, floatabr, name, cargcall) floatabr##name##_ cargcall; -#else -#define FUNC_DECL(prefix, floatabr, name, argdecl, argcall) \ - FUNC(prefix, floatabr##name, argdecl, argcall) - -#define FUNC_CALL(prefix, floatabr, name, cargcall) prefix##_##floatabr##name cargcall; -#endif - -#define INSTANTIATE_CPU(cpu, name, Float, argdecl) \ - template void name argdecl(Float); - -#ifdef ONEDAL_CPU_DISPATCH_A8SVE -#define INSTANTIATE_A8SVE(...) DAL_EXPAND(INSTANTIATE_CPU(sve, __VA_ARGS__)) -#else -#define INSTANTIATE_A8SVE(...) -#endif - -#ifdef ONEDAL_CPU_DISPATCH_AVX512 -#define INSTANTIATE_AVX512(...) DAL_EXPAND(INSTANTIATE_CPU(avx512, __VA_ARGS__)) -#else -#define INSTANTIATE_AVX512(...) -#endif - -#ifdef ONEDAL_CPU_DISPATCH_AVX2 -#define INSTANTIATE_AVX2(...) DAL_EXPAND(INSTANTIATE_CPU(avx2, __VA_ARGS__)) -#else -#define INSTANTIATE_AVX2(...) -#endif - -#ifdef ONEDAL_CPU_DISPATCH_SSE42 -#define INSTANTIATE_SSE42(...) DAL_EXPAND(INSTANTIATE_CPU(sse42, __VA_ARGS__)) -#else -#define INSTANTIATE_SSE42(...) -#endif - -#ifdef ONEDAL_CPU_DISPATCH_RV64 -#define INSTANTIATE_RV64(...) DAL_EXPAND(INSTANTIATE_CPU(rv64, __VA_ARGS__)) -#else -#define INSTANTIATE_RV64(...) -#endif - -#define INSTANTIATE_SSE2(...) DAL_EXPAND(INSTANTIATE_CPU(sse2, __VA_ARGS__)) - -#if defined(TARGET_X86_64) -#define INSTANTIATE_FLOAT(name, Float, argdecl) \ - INSTANTIATE_AVX512(name, Float, argdecl) \ - INSTANTIATE_AVX2(name, Float, argdecl) \ - INSTANTIATE_SSE42(name, Float, argdecl) \ - INSTANTIATE_SSE2(name, Float, argdecl) -#elif defined(TARGET_ARM) -#define INSTANTIATE_FLOAT(name, Float, argdecl) INSTANTIATE_A8SVE(name, Float, argdecl) -#elif defined(TARGET_RISCV64) -#define INSTANTIATE_FLOAT(name, Float, argdecl) INSTANTIATE_RV64(name, Float, argdecl) -#endif - -#define FUNC_TEMPLATE(prefix, name, fargdecl, cargdecl, fargcall, cargcall) \ - FUNC_DECL(prefix, s, name, fargdecl(float), fargcall) \ - FUNC_DECL(prefix, d, name, fargdecl(double), fargcall) \ - \ - namespace oneapi::dal::backend::micromkl { \ - \ - template \ - void name cargdecl(Float) { \ - static_assert(sizeof(std::int64_t) == sizeof(DAAL_INT)); \ - if constexpr (std::is_same_v) { \ - FUNC_CALL(prefix, s, name, cargcall) \ - } \ - else { \ - FUNC_CALL(prefix, d, name, cargcall) \ - } \ - } \ - \ - INSTANTIATE_FLOAT(name, float, cargdecl) \ - INSTANTIATE_FLOAT(name, double, cargdecl) \ - } diff --git a/cpp/oneapi/dal/backend/micromkl/micromkl.cpp b/cpp/oneapi/dal/backend/micromkl/micromkl.cpp deleted file mode 100644 index 442ae288e10..00000000000 --- a/cpp/oneapi/dal/backend/micromkl/micromkl.cpp +++ /dev/null @@ -1,80 +0,0 @@ -/******************************************************************************* -* Copyright 2021 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*******************************************************************************/ - -#include -#include "oneapi/dal/backend/micromkl/micromkl.hpp" -#include "oneapi/dal/backend/dispatcher.hpp" - -#define __MICROMKL_INCLUDE_GUARD__ - -#include "oneapi/dal/backend/micromkl/macro.hpp" - -/* ================================== SYEVD ================================= */ -#define SYEVD_F_DECLARGS(Float) \ - (const char* jobz, \ - const char* uplo, \ - const DAAL_INT* n, \ - Float* a, \ - const DAAL_INT* lda, \ - Float* w, \ - Float* work, \ - const DAAL_INT* lwork, \ - DAAL_INT* iwork, \ - const DAAL_INT* liwork, \ - DAAL_INT* info, \ - int ijobz, \ - int iuplo) - -#define SYEVD_C_DECLARGS(Float) \ - (char jobz, \ - char uplo, \ - std::int64_t n, \ - Float* a, \ - std::int64_t lda, \ - Float* w, \ - Float* work, \ - std::int64_t lwork, \ - std::int64_t* iwork, \ - std::int64_t liwork, \ - std::int64_t& info) - -#define SYEVD_F_CALLARGS (jobz, uplo, n, a, lda, w, work, lwork, iwork, liwork, info, ijobz, iuplo) - -#define SYEVD_C_CALLARGS \ - (&jobz, \ - &uplo, \ - reinterpret_cast(&n), \ - a, \ - reinterpret_cast(&lda), \ - w, \ - work, \ - reinterpret_cast(&lwork), \ - reinterpret_cast(iwork), \ - reinterpret_cast(&liwork), \ - reinterpret_cast(&info), \ - 1, \ - 1) - -#ifdef ONEDAL_REF -FUNC_TEMPLATE(unused, syevd, SYEVD_F_DECLARGS, SYEVD_C_DECLARGS, SYEVD_F_CALLARGS, SYEVD_C_CALLARGS) -#else -FUNC_TEMPLATE(fpk_lapack, - syevd, - SYEVD_F_DECLARGS, - SYEVD_C_DECLARGS, - SYEVD_F_CALLARGS, - SYEVD_C_CALLARGS) -#endif diff --git a/cpp/oneapi/dal/backend/primitives/blas/gemm_dpc.cpp b/cpp/oneapi/dal/backend/primitives/blas/gemm_dpc.cpp index 5f00860293d..1cbb5512eb5 100644 --- a/cpp/oneapi/dal/backend/primitives/blas/gemm_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/blas/gemm_dpc.cpp @@ -18,7 +18,7 @@ #include "oneapi/dal/backend/primitives/blas/gemm.hpp" #include "oneapi/dal/backend/primitives/blas/misc.hpp" -#include +#include namespace oneapi::dal::backend::primitives { diff --git a/cpp/oneapi/dal/backend/primitives/blas/gemv_dpc.cpp b/cpp/oneapi/dal/backend/primitives/blas/gemv_dpc.cpp index a0cc31d8ff8..d13e51e1e00 100644 --- a/cpp/oneapi/dal/backend/primitives/blas/gemv_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/blas/gemv_dpc.cpp @@ -18,7 +18,7 @@ #include "oneapi/dal/backend/primitives/blas/gemv.hpp" #include "oneapi/dal/backend/primitives/blas/misc.hpp" -#include +#include namespace oneapi::dal::backend::primitives { diff --git a/cpp/oneapi/dal/backend/primitives/blas/misc.hpp b/cpp/oneapi/dal/backend/primitives/blas/misc.hpp index 518c59bdf50..64f9b70fece 100644 --- a/cpp/oneapi/dal/backend/primitives/blas/misc.hpp +++ b/cpp/oneapi/dal/backend/primitives/blas/misc.hpp @@ -18,12 +18,13 @@ #include "oneapi/dal/backend/primitives/ndarray.hpp" -#include +#include namespace oneapi::dal::backend::primitives { -namespace mkl = oneapi::fpk; +namespace mkl = oneapi::mkl; +#ifdef ONEDAL_DATA_PARALLEL /// Convert oneDAL `ndorder` to oneMKL `layout` inline constexpr mkl::layout order_as_layout(ndorder order) { return (order == ndorder::c) ? mkl::layout::R /* row-major */ @@ -55,5 +56,5 @@ inline constexpr mkl::uplo ident_uplo(mkl::uplo order) { constexpr auto lower = mkl::uplo::lower; return (order == upper) ? upper : lower; } - +#endif } // namespace oneapi::dal::backend::primitives diff --git a/cpp/oneapi/dal/backend/primitives/blas/syrk.hpp b/cpp/oneapi/dal/backend/primitives/blas/syrk.hpp index 7bc219b4b41..c254eddaadd 100644 --- a/cpp/oneapi/dal/backend/primitives/blas/syrk.hpp +++ b/cpp/oneapi/dal/backend/primitives/blas/syrk.hpp @@ -23,7 +23,7 @@ namespace oneapi::dal::backend::primitives { #ifdef ONEDAL_DATA_PARALLEL -namespace mkl = oneapi::fpk; +namespace mkl = oneapi::mkl; template sycl::event syrk(sycl::queue& queue, diff --git a/cpp/oneapi/dal/backend/primitives/blas/syrk_dpc.cpp b/cpp/oneapi/dal/backend/primitives/blas/syrk_dpc.cpp index 6c91531a2c0..dc883a3e77f 100644 --- a/cpp/oneapi/dal/backend/primitives/blas/syrk_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/blas/syrk_dpc.cpp @@ -18,7 +18,7 @@ #include "oneapi/dal/backend/primitives/blas/syrk.hpp" #include "oneapi/dal/backend/primitives/blas/misc.hpp" -#include +#include namespace oneapi::dal::backend::primitives { diff --git a/cpp/oneapi/dal/backend/primitives/lapack.hpp b/cpp/oneapi/dal/backend/primitives/lapack.hpp index 8c6fd87e4d9..e5ae59f2a74 100644 --- a/cpp/oneapi/dal/backend/primitives/lapack.hpp +++ b/cpp/oneapi/dal/backend/primitives/lapack.hpp @@ -16,7 +16,7 @@ #pragma once -#include "oneapi/dal/backend/primitives/lapack/eigen.hpp" #include "oneapi/dal/backend/primitives/lapack/solve.hpp" #include "oneapi/dal/backend/primitives/lapack/misc.hpp" #include "oneapi/dal/backend/primitives/lapack/gesvd.hpp" +#include "oneapi/dal/backend/primitives/lapack/syevd.hpp" diff --git a/cpp/oneapi/dal/backend/primitives/lapack/BUILD b/cpp/oneapi/dal/backend/primitives/lapack/BUILD index 799117800f9..fced4d31462 100644 --- a/cpp/oneapi/dal/backend/primitives/lapack/BUILD +++ b/cpp/oneapi/dal/backend/primitives/lapack/BUILD @@ -8,7 +8,6 @@ dal_module( name = "lapack", auto = True, dal_deps = [ - "@onedal//cpp/oneapi/dal/backend/micromkl", "@onedal//cpp/oneapi/dal/backend/primitives:blas", "@onedal//cpp/oneapi/dal/backend/primitives:common", ], diff --git a/cpp/oneapi/dal/backend/primitives/lapack/eigen.cpp b/cpp/oneapi/dal/backend/primitives/lapack/eigen.cpp deleted file mode 100644 index 433001b077a..00000000000 --- a/cpp/oneapi/dal/backend/primitives/lapack/eigen.cpp +++ /dev/null @@ -1,87 +0,0 @@ -/******************************************************************************* -* Copyright 2021 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*******************************************************************************/ - -#include "oneapi/dal/backend/primitives/lapack/eigen.hpp" -#include "oneapi/dal/backend/dispatcher.hpp" -#include "oneapi/dal/backend/micromkl/micromkl.hpp" - -namespace oneapi::dal::backend::primitives { - -template -inline void syevd(Args&&... args) { - dispatch_by_cpu(context_cpu{}, [&](auto cpu) { - using dal::backend::micromkl::syevd; - syevd(std::forward(args)...); - }); -} - -template -void sym_eigvals_impl(Float* a, std::int64_t n, std::int64_t lda, Float* w) { - ONEDAL_ASSERT(a); - ONEDAL_ASSERT(w); - ONEDAL_ASSERT(n > 0); - ONEDAL_ASSERT(lda >= n); - - const std::int64_t lwork = 2 * n * n + 6 * n + 1; - const std::int64_t liwork = 5 * n + 3; - - ONEDAL_ASSERT(lwork > n); - ONEDAL_ASSERT(liwork > n); - - const auto work = ndarray::empty(lwork); - const auto iwork = ndarray::empty(liwork); - - Float* work_ptr = work.get_mutable_data(); - std::int64_t* iwork_ptr = iwork.get_mutable_data(); - - std::int64_t info; - syevd('V', 'U', n, a, lda, w, work_ptr, lwork, iwork_ptr, liwork, info); - - if (info != 0) { - throw internal_error{ dal::detail::error_messages::failed_to_compute_eigenvectors() }; - } -} - -template -void flip_eigvals_impl(Float* a, - Float* w, - std::int64_t n, - std::int64_t lda, - std::int64_t w_count, - Float* a_flipped, - std::int64_t lda_flipped, - Float* w_flipped) { - dispatch_by_cpu(context_cpu{}, [&](auto cpu) { - flip_eigvals_impl_cpu(a, - w, - n, - lda, - w_count, - a_flipped, - lda_flipped, - w_flipped); - }); -} - -#define INSTANTIATE(F) \ - template void sym_eigvals_impl(F*, std::int64_t, std::int64_t, F*); \ - template void \ - flip_eigvals_impl(F*, F*, std::int64_t, std::int64_t, std::int64_t, F*, std::int64_t, F*); - -INSTANTIATE(float) -INSTANTIATE(double) - -} // namespace oneapi::dal::backend::primitives diff --git a/cpp/oneapi/dal/backend/primitives/lapack/eigen.hpp b/cpp/oneapi/dal/backend/primitives/lapack/eigen.hpp deleted file mode 100644 index 3bef21dc882..00000000000 --- a/cpp/oneapi/dal/backend/primitives/lapack/eigen.hpp +++ /dev/null @@ -1,122 +0,0 @@ -/******************************************************************************* -* Copyright 2021 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*******************************************************************************/ - -#pragma once - -#include "oneapi/dal/backend/primitives/ndarray.hpp" - -namespace oneapi::dal::backend::primitives { - -/// Do not use this. -template -void sym_eigvals_impl(Float* a, std::int64_t n, std::int64_t lda, Float* w); - -/// Do not use this. -template -void flip_eigvals_impl(Float* a, - Float* w, - std::int64_t n, - std::int64_t lda, - std::int64_t w_count, - Float* a_flipped, - std::int64_t lda_flipped, - Float* w_flipped); - -/// Do not use this. -template -void flip_eigvals_impl_cpu(Float* a, - Float* w, - std::int64_t n, - std::int64_t lda, - std::int64_t w_count, - Float* a_flipped, - std::int64_t lda_flipped, - Float* w_flipped); - -/// Computes eigenvectors and eigenvalues in-place. -/// -/// @param[in, out] data_or_eigvecs The input parameter is interpreted as symmetric matrix of -/// size [n x n]. The computed eigenvectors is written to that -/// matrix. If `order == ndorder::c`, $i$-th row of the matrix -/// contains $i$-th eigenvector. If `order == ndorder::f`, $i$-th -/// column of the matrix contains $i$-th eigenvector. -/// @param[out] eigvals The output array of size [n] that stores computed eigenvalues. -/// The eigenvalues are written in ascending order. $i$-th eigenvalue -/// corrensponds to $i$-th eigenvector. -template -inline void sym_eigvals(ndview& data_or_eigvecs, ndview& eigvals) { - ONEDAL_ASSERT(data_or_eigvecs.get_dimension(0) == data_or_eigvecs.get_dimension(1), - "Input matrix must be square"); - ONEDAL_ASSERT(eigvals.get_dimension(0) >= data_or_eigvecs.get_dimension(0)); - ONEDAL_ASSERT(data_or_eigvecs.has_mutable_data()); - ONEDAL_ASSERT(eigvals.has_mutable_data()); - - sym_eigvals_impl(data_or_eigvecs.get_mutable_data(), - data_or_eigvecs.get_dimension(0), - data_or_eigvecs.get_leading_stride(), - eigvals.get_mutable_data()); -} - -/// Computes eigenvectors and eigenvalues in-place. Eigenvectors and eigenvalues are written in -/// descending order determined by eigenvalues. For more details, see `sym_eigvals`. -template -inline void sym_eigvals_descending(ndview& data_or_eigvecs, - ndview& eigvals) { - sym_eigvals(data_or_eigvecs, eigvals); - flip_eigvals_impl(data_or_eigvecs.get_mutable_data(), - eigvals.get_mutable_data(), - data_or_eigvecs.get_dimension(0), - data_or_eigvecs.get_leading_stride(), - data_or_eigvecs.get_dimension(0), - data_or_eigvecs.get_mutable_data(), - data_or_eigvecs.get_leading_stride(), - eigvals.get_mutable_data()); -} - -/// Computes eigenvectors and eigenvalues in-place. `eigval_count` eigenvectors -/// and eigenvalues are written in descending order determined by eigenvalues to -/// `eigvecs` and `eigvals` arrays. -/// -/// @param[in, out] data_or_scratchpad The input parameter is interpreted as symmetric matrix -/// of size [n x n]. The memory is used as a storage for -/// intermediate computations. -/// @param[in] eigval_count The number of eigenvalues and eigenvectors to store to -/// the output buffers. -/// @param[out] eigvecs The output array of size [eigval_count x n] that stores -/// eigenvectors. If `order == ndorder::c`, $i$-th row of the -/// matrix contains $i$-th eigenvector. If `order == ndorder::f`, -/// $i$-th column of the matrix contains $i$-th eigenvector. -/// @param[out] eigvals The output array of size [eigval_count] that stores computed -/// eigenvalues. The eigenvalues are written in ascending order. -/// $i$-th eigenvalue corrensponds to $i$-th eigenvector. -template -inline void sym_eigvals_descending(ndview& data_or_scratchpad, - std::int64_t eigval_count, - ndview& eigvecs, - ndview& eigvals) { - auto eigvals_full = ndarray::empty(data_or_scratchpad.get_dimension(0)); - sym_eigvals(data_or_scratchpad, eigvals_full); - flip_eigvals_impl(data_or_scratchpad.get_mutable_data(), - eigvals_full.get_mutable_data(), - data_or_scratchpad.get_dimension(0), - data_or_scratchpad.get_leading_stride(), - eigval_count, - eigvecs.get_mutable_data(), - eigvecs.get_leading_stride(), - eigvals.get_mutable_data()); -} - -} // namespace oneapi::dal::backend::primitives diff --git a/cpp/oneapi/dal/backend/primitives/lapack/eigen_cpu.cpp b/cpp/oneapi/dal/backend/primitives/lapack/eigen_cpu.cpp deleted file mode 100644 index 7b0264c0c1f..00000000000 --- a/cpp/oneapi/dal/backend/primitives/lapack/eigen_cpu.cpp +++ /dev/null @@ -1,94 +0,0 @@ -/******************************************************************************* -* Copyright 2021 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*******************************************************************************/ - -#include "oneapi/dal/backend/dispatcher.hpp" -#include "oneapi/dal/backend/primitives/lapack/eigen.hpp" - -namespace oneapi::dal::backend::primitives { - -template -void flip_eigvals_impl_cpu(Float* a, - Float* w, - std::int64_t n, - std::int64_t lda, - std::int64_t w_count, - Float* a_flipped, - std::int64_t lda_flipped, - Float* w_flipped) { - ONEDAL_ASSERT(a); - ONEDAL_ASSERT(w); - ONEDAL_ASSERT(a_flipped); - ONEDAL_ASSERT(w_flipped); - ONEDAL_ASSERT(n > 0); - ONEDAL_ASSERT(lda >= n); - ONEDAL_ASSERT(w_count > 0); - ONEDAL_ASSERT(w_count <= n); - - if (a == a_flipped) { - ONEDAL_ASSERT(lda == lda_flipped); - - for (std::int64_t i = 0; i < n / 2; i++) { - const std::int64_t src_i = i; - const std::int64_t dst_i = n - i - 1; - for (std::int64_t j = 0; j < n; j++) { - std::swap(a[src_i * lda + j], a[dst_i * lda + j]); - } - } - } - else { - PRAGMA_IVDEP - for (std::int64_t i = 0; i < w_count; i++) { - const std::int64_t src_i = n - i - 1; - const std::int64_t dst_i = i; - for (std::int64_t j = 0; j < n; j++) { - a_flipped[dst_i * lda_flipped + j] = a[src_i * lda + j]; - } - } - } - - if (w == w_flipped) { - ONEDAL_ASSERT(n == w_count); - - for (std::int64_t i = 0; i < n / 2; i++) { - const std::int64_t src_i = i; - const std::int64_t dst_i = n - i - 1; - std::swap(w[src_i], w[dst_i]); - } - } - else { - PRAGMA_IVDEP - for (std::int64_t i = 0; i < w_count; i++) { - const std::int64_t src_i = n - i - 1; - const std::int64_t dst_i = i; - w_flipped[dst_i] = w[src_i]; - } - } -} - -#define INSTANTIATE(Cpu, Float) \ - template void flip_eigvals_impl_cpu(Float*, \ - Float*, \ - std::int64_t, \ - std::int64_t, \ - std::int64_t, \ - Float*, \ - std::int64_t, \ - Float*); - -INSTANTIATE(__CPU_TAG__, float) -INSTANTIATE(__CPU_TAG__, double) - -} // namespace oneapi::dal::backend::primitives diff --git a/cpp/oneapi/dal/backend/primitives/lapack/gesvd.hpp b/cpp/oneapi/dal/backend/primitives/lapack/gesvd.hpp index 0aba5f8edf9..311f2a5a3c2 100644 --- a/cpp/oneapi/dal/backend/primitives/lapack/gesvd.hpp +++ b/cpp/oneapi/dal/backend/primitives/lapack/gesvd.hpp @@ -24,7 +24,7 @@ namespace oneapi::dal::backend::primitives { #ifdef ONEDAL_DATA_PARALLEL -namespace mkl = oneapi::fpk; +namespace mkl = oneapi::mkl; template sycl::event gesvd(sycl::queue& queue, diff --git a/cpp/oneapi/dal/backend/primitives/lapack/gesvd_dpc.cpp b/cpp/oneapi/dal/backend/primitives/lapack/gesvd_dpc.cpp index 8bb07ca2ed2..59b02c2191b 100644 --- a/cpp/oneapi/dal/backend/primitives/lapack/gesvd_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/lapack/gesvd_dpc.cpp @@ -18,7 +18,7 @@ #include "oneapi/dal/backend/primitives/lapack/gesvd.hpp" #include "oneapi/dal/backend/primitives/blas/misc.hpp" #include "oneapi/dal/backend/primitives/ndarray.hpp" -#include +#include namespace oneapi::dal::backend::primitives { diff --git a/cpp/oneapi/dal/backend/primitives/lapack/misc.hpp b/cpp/oneapi/dal/backend/primitives/lapack/misc.hpp index 62cf57fe6be..7893d7523b9 100644 --- a/cpp/oneapi/dal/backend/primitives/lapack/misc.hpp +++ b/cpp/oneapi/dal/backend/primitives/lapack/misc.hpp @@ -18,11 +18,11 @@ #include "oneapi/dal/backend/primitives/ndarray.hpp" -#include +#include namespace oneapi::dal::backend::primitives { -namespace mkl = oneapi::fpk; +namespace mkl = oneapi::mkl; inline constexpr mkl::job ident_job(mkl::job order) { constexpr auto novec = mkl::job::novec; diff --git a/cpp/oneapi/dal/backend/primitives/lapack/syevd.hpp b/cpp/oneapi/dal/backend/primitives/lapack/syevd.hpp new file mode 100644 index 00000000000..dbed4c9f84c --- /dev/null +++ b/cpp/oneapi/dal/backend/primitives/lapack/syevd.hpp @@ -0,0 +1,39 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#pragma once + +#include "oneapi/dal/backend/primitives/ndarray.hpp" +#include "oneapi/dal/backend/primitives/blas/misc.hpp" +#include "oneapi/dal/backend/primitives/lapack/misc.hpp" + +namespace oneapi::dal::backend::primitives { + +#ifdef ONEDAL_DATA_PARALLEL + +namespace mkl = oneapi::mkl; + +template +sycl::event syevd(sycl::queue& queue, + std::int64_t column_count, + ndview& a, + std::int64_t lda, + ndview& eigenvalues, + const event_vector& deps = {}); + +#endif + +} // namespace oneapi::dal::backend::primitives diff --git a/cpp/oneapi/dal/backend/primitives/lapack/syevd_dpc.cpp b/cpp/oneapi/dal/backend/primitives/lapack/syevd_dpc.cpp new file mode 100644 index 00000000000..cbb3e06a779 --- /dev/null +++ b/cpp/oneapi/dal/backend/primitives/lapack/syevd_dpc.cpp @@ -0,0 +1,96 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/detail/profiler.hpp" +#include "oneapi/dal/backend/primitives/lapack/syevd.hpp" +#include "oneapi/dal/backend/primitives/blas/misc.hpp" +#include "oneapi/dal/backend/primitives/ndarray.hpp" +#include + +namespace oneapi::dal::backend::primitives { + +template +static sycl::event syevd_wrapper(sycl::queue& queue, + mkl::job jobz, + mkl::uplo uplo, + std::int64_t column_count, + Float* data_ptr, + std::int64_t lda, + Float* eigenvalues, + Float* scratchpad, + std::int64_t scratchpad_size, + const event_vector& deps) { + ONEDAL_ASSERT(lda >= column_count); + + return mkl::lapack::syevd(queue, + jobz, + uplo, + column_count, + data_ptr, + lda, + eigenvalues, + scratchpad, + scratchpad_size, + deps); +} + +template +sycl::event syevd(sycl::queue& queue, + std::int64_t column_count, + ndview& a, + std::int64_t lda, + ndview& eigenvalues, + const event_vector& deps) { + constexpr auto job = ident_job(jobz); + constexpr auto ul = ident_uplo(uplo); + + const auto scratchpad_size = + mkl::lapack::syevd_scratchpad_size(queue, jobz, uplo, column_count, lda); + auto scratchpad = + ndarray::empty(queue, { scratchpad_size }, sycl::usm::alloc::device); + + return syevd_wrapper(queue, + job, + ul, + column_count, + a.get_mutable_data(), + lda, + eigenvalues.get_mutable_data(), + scratchpad.get_mutable_data(), + scratchpad_size, + deps); +} + +#define INSTANTIATE(jobz, uplo, F) \ + template ONEDAL_EXPORT sycl::event syevd(sycl::queue & queue, \ + std::int64_t n, \ + ndview & a, \ + std::int64_t lda, \ + ndview & w, \ + const event_vector& deps); + +#define INSTANTIATE_FLOAT(jobz, uplo) \ + INSTANTIATE(jobz, uplo, float) \ + INSTANTIATE(jobz, uplo, double) + +#define INSTANTIATE_JOB(uplo) \ + INSTANTIATE_FLOAT(mkl::job::novec, uplo) \ + INSTANTIATE_FLOAT(mkl::job::vec, uplo) + +INSTANTIATE_JOB(mkl::uplo::upper) +INSTANTIATE_JOB(mkl::uplo::lower) + +} // namespace oneapi::dal::backend::primitives diff --git a/cpp/oneapi/dal/backend/primitives/lapack/test/eigen.cpp b/cpp/oneapi/dal/backend/primitives/lapack/test/syevd_dpc.cpp similarity index 59% rename from cpp/oneapi/dal/backend/primitives/lapack/test/eigen.cpp rename to cpp/oneapi/dal/backend/primitives/lapack/test/syevd_dpc.cpp index 6a6a0b44bbc..56484014a81 100644 --- a/cpp/oneapi/dal/backend/primitives/lapack/test/eigen.cpp +++ b/cpp/oneapi/dal/backend/primitives/lapack/test/syevd_dpc.cpp @@ -14,11 +14,11 @@ * limitations under the License. *******************************************************************************/ -#include "oneapi/dal/backend/primitives/lapack/eigen.hpp" +#include "oneapi/dal/backend/primitives/lapack/syevd.hpp" #include "oneapi/dal/test/engine/common.hpp" #include "oneapi/dal/test/engine/math.hpp" -#include "oneapi/dal/test/engine/io.hpp" +#include "oneapi/dal/test/engine/fixtures.hpp" namespace oneapi::dal::backend::primitives::test { @@ -26,8 +26,9 @@ namespace te = dal::test::engine; namespace la = te::linalg; template -class sym_eigvals_test { +class syevd_test : public te::float_algo_fixture { public: + using float_t = Float; std::int64_t generate_dim() const { return GENERATE(3, 28, 125, 256); } @@ -47,23 +48,6 @@ class sym_eigvals_test { return call_sym_eigvals_inplace_generic(symmetric_matrix, is_ascending); } - auto call_sym_eigvals_descending(const la::matrix& symmetric_matrix, - std::int64_t eigval_count) { - ONEDAL_ASSERT(symmetric_matrix.get_row_count() == symmetric_matrix.get_column_count()); - - const std::int64_t dim = symmetric_matrix.get_row_count(); - const auto s_copy_flat = symmetric_matrix.copy().get_array(); - - auto data_or_scratchpad_nd = ndarray::wrap_mutable(s_copy_flat, { dim, dim }); - auto eigvecs_nd = ndarray::empty({ eigval_count, dim }); - auto eigvals_nd = ndarray::empty(eigval_count); - sym_eigvals_descending(data_or_scratchpad_nd, eigval_count, eigvecs_nd, eigvals_nd); - - const auto eigvecs = la::matrix::wrap_nd(eigvecs_nd); - const auto eigvals = la::matrix::wrap_nd(eigvals_nd); - return std::make_tuple(eigvecs, eigvals); - } - auto call_sym_eigvals_inplace_generic(const la::matrix& symmetric_matrix, bool is_ascending) { ONEDAL_ASSERT(symmetric_matrix.get_row_count() == symmetric_matrix.get_column_count()); @@ -72,17 +56,51 @@ class sym_eigvals_test { const auto s_copy_flat = symmetric_matrix.copy().get_array(); auto data_or_eigenvectors_nd = ndarray::wrap_mutable(s_copy_flat, { dim, dim }); - auto eigenvalues_nd = ndarray::empty(dim); + data_or_eigenvectors_nd.to_device(this->get_queue()); + auto eigenvalues_nd = + ndarray::empty(this->get_queue(), { dim }, sycl::usm::alloc::device); if (is_ascending) { - sym_eigvals(data_or_eigenvectors_nd, eigenvalues_nd); + auto syevd_event = syevd(this->get_queue(), + dim, + data_or_eigenvectors_nd, + dim, + eigenvalues_nd, + {}); + syevd_event.wait_and_throw(); + const auto eigenvectors = + la::matrix::wrap_nd(data_or_eigenvectors_nd.to_host(this->get_queue())); + const auto eigenvalues = + la::matrix::wrap_nd(eigenvalues_nd.to_host(this->get_queue())); + return std::make_tuple(eigenvectors, eigenvalues); } else { - sym_eigvals_descending(data_or_eigenvectors_nd, eigenvalues_nd); + auto syevd_event = syevd(this->get_queue(), + dim, + data_or_eigenvectors_nd, + dim, + eigenvalues_nd, + {}); + syevd_event.wait_and_throw(); + + auto data_ptr = eigenvalues_nd.get_data(); + auto flipped_eigenvalues = + ndarray::empty(this->get_queue(), { dim }, sycl::usm::alloc::device); + auto flipped_eigenvalues_ptr = flipped_eigenvalues.get_mutable_data(); + auto queue = this->get_queue(); + auto flip_event = queue.submit([&](sycl::handler& h) { + const auto range = make_range_1d(dim); + h.depends_on({ syevd_event }); + h.parallel_for(range, [=](sycl::id<1> id) { + const std::int64_t col = id[0]; + flipped_eigenvalues_ptr[col] = data_ptr[(dim - 1) - col]; + }); + }); + const auto eigenvectors = + la::matrix::wrap_nd(data_or_eigenvectors_nd.to_host(this->get_queue())); + const auto eigenvalues = + la::matrix::wrap_nd(flipped_eigenvalues.to_host(this->get_queue())); + return std::make_tuple(eigenvectors, eigenvalues); } - - const auto eigenvectors = la::matrix::wrap_nd(data_or_eigenvectors_nd); - const auto eigenvalues = la::matrix::wrap_nd(eigenvalues_nd); - return std::make_tuple(eigenvectors, eigenvalues); } void check_eigvals_definition(const la::matrix& s, @@ -132,38 +150,21 @@ class sym_eigvals_test { static constexpr int seed_ = 7777; }; -using eigen_types = COMBINE_TYPES((float, double)); - -#define SYM_EIGVALS_TEST(name) \ - TEMPLATE_LIST_TEST_M(sym_eigvals_test, name, "[sym_eigvals]", eigen_types) +using eigen_types = COMBINE_TYPES((float)); -SYM_EIGVALS_TEST("check inplace sym_eigvals on symmetric positive-definite matrix") { +TEMPLATE_LIST_TEST_M(syevd_test, "test syevd with pos def matrix", "[sym_eigvals]", eigen_types) { const auto s = this->generate_symmetric_positive(); - const auto [eigenvectors, eigenvalues] = this->call_sym_eigvals_inplace(s); this->check_eigvals_definition(s, eigenvectors, eigenvalues); this->check_eigvals_are_ascending(eigenvalues); } -SYM_EIGVALS_TEST("check inplace sym_eigvals_descending on symmetric positive-definite matrix") { +TEMPLATE_LIST_TEST_M(syevd_test, "test syevd with pos def matrix 2", "[sym_eigvals]", eigen_types) { const auto s = this->generate_symmetric_positive(); const auto [eigenvectors, eigenvalues] = this->call_sym_eigvals_inplace_descending(s); - this->check_eigvals_definition(s, eigenvectors, eigenvalues); - this->check_eigvals_are_descending(eigenvalues); -} - -SYM_EIGVALS_TEST("check sym_eigvals_descending on symmetric positive-definite matrix") { - const auto s = this->generate_symmetric_positive(); - const std::int64_t eigvals_count = GENERATE_COPY(1, s.get_row_count() / 2, s.get_row_count()); - - const auto [eigenvectors, eigenvalues] = this->call_sym_eigvals_descending(s, eigvals_count); - - REQUIRE(eigenvectors.get_row_count() == eigvals_count); - REQUIRE(eigenvalues.get_count() == eigvals_count); - this->check_eigvals_definition(s, eigenvectors, eigenvalues); this->check_eigvals_are_descending(eigenvalues); } diff --git a/cpp/oneapi/dal/backend/primitives/sparse_blas/misc.hpp b/cpp/oneapi/dal/backend/primitives/sparse_blas/misc.hpp index 8a475db7cb1..8fe574a36be 100644 --- a/cpp/oneapi/dal/backend/primitives/sparse_blas/misc.hpp +++ b/cpp/oneapi/dal/backend/primitives/sparse_blas/misc.hpp @@ -18,11 +18,11 @@ #include "oneapi/dal/table/common.hpp" -#include +#include namespace oneapi::dal::backend::primitives { -namespace mkl = oneapi::fpk; +namespace mkl = oneapi::mkl; /// Convert oneDAL `sparse_indexing` to oneMKL `index_base` inline constexpr mkl::index_base sparse_indexing_to_mkl(const sparse_indexing indexing) { diff --git a/cpp/oneapi/dal/detail/sparse_matrix_handle_impl.hpp b/cpp/oneapi/dal/detail/sparse_matrix_handle_impl.hpp index fb340e311a6..9f382b3dc2a 100644 --- a/cpp/oneapi/dal/detail/sparse_matrix_handle_impl.hpp +++ b/cpp/oneapi/dal/detail/sparse_matrix_handle_impl.hpp @@ -18,13 +18,13 @@ #ifdef ONEDAL_DATA_PARALLEL -#include +#include namespace oneapi::dal::detail { namespace v1 { -namespace mkl = oneapi::fpk; +namespace mkl = oneapi::mkl; /// Class that hides the implementation details of the `backend::primitives::sparse_matrix_handle` class class sparse_matrix_handle_impl { diff --git a/cpp/oneapi/dal/test/engine/mkl/BUILD b/cpp/oneapi/dal/test/engine/mkl/BUILD index 6bf0e21fcde..92ead4e03e0 100644 --- a/cpp/oneapi/dal/test/engine/mkl/BUILD +++ b/cpp/oneapi/dal/test/engine/mkl/BUILD @@ -13,7 +13,7 @@ dal_test_module( extra_deps = [{ "@config//:backend_ref": [ "@openblas//:openblas", ], - "//conditions:default": [ "@mkl//:mkl_seq", + "//conditions:default": [ "@mkl//:mkl_thr", ], }], ) diff --git a/dev/bazel/deps/micromkl.bzl b/dev/bazel/deps/micromkl.bzl deleted file mode 100644 index e06ce773cf5..00000000000 --- a/dev/bazel/deps/micromkl.bzl +++ /dev/null @@ -1,67 +0,0 @@ -#=============================================================================== -# Copyright 2020 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -#=============================================================================== - -load("@onedal//dev/bazel:repos.bzl", "repos") - -micromkl_repo = repos.prebuilt_libs_repo_rule( - includes = [ - "include", - "%{os}/include", - ], - libs = [ - "%{os}/lib/libdaal_mkl_thread.a", - "%{os}/lib/libdaal_vmlipp_core.a", - ], - build_template = "@onedal//dev/bazel/deps:micromkl.tpl.BUILD", - download_mapping = { - # Required directory layout and layout in the downloaded - # archives may be different. Mapping helps to setup relations - # between required layout (LHS) and downloaded (RHS). - # In this case, files from `lib/*` will be copied to `lib/intel64/*`. - "lib/": "lib/intel64/", - }, - local_mapping = { - # Required directory layout and layout in the downloaded - # archives may be different. Mapping helps to setup relations - # between required layout (LHS) and downloaded (RHS). - # In this case, files from `lib/*` will be copied to `lib/intel64/*`. - "lib/": "lib/intel64/", - }, -) - -micromkl_dpc_repo = repos.prebuilt_libs_repo_rule( - includes = [ - "include", - ], - libs = [ - "lib/libdaal_sycl.a", - ], - build_template = "@onedal//dev/bazel/deps:micromkldpc.tpl.BUILD", - download_mapping = { - # Required directory layout and layout in the downloaded - # archives may be different. Mapping helps to setup relations - # between required layout (LHS) and downloaded (RHS). - # In this case, files from `lib/*` will be copied to `lib/intel64/*`. - "lib/": "lib/intel64/", - }, - local_mapping = { - # Required directory layout and layout in the downloaded - # archives may be different. Mapping helps to setup relations - # between required layout (LHS) and downloaded (RHS). - # In this case, files from `lib/*` will be copied to `lib/intel64/*`. - "lib/": "lib/intel64/", - }, -) diff --git a/dev/bazel/deps/micromkl.tpl.BUILD b/dev/bazel/deps/micromkl.tpl.BUILD deleted file mode 100644 index eef6d53297f..00000000000 --- a/dev/bazel/deps/micromkl.tpl.BUILD +++ /dev/null @@ -1,27 +0,0 @@ -package(default_visibility = ["//visibility:public"]) - -cc_library( - name = "headers", - hdrs = glob(["include/*.h", "%{os}/include/*.h"]), - includes = [ "include", "%{os}/include" ], -) - -cc_library( - name = "vml_ipp", - srcs = [ - "%{os}/lib/libdaal_vmlipp_core.a", - ], - deps = [ - ":headers", - ], -) - -cc_library( - name = "mkl_thr", - srcs = [ - "%{os}/lib/libdaal_mkl_thread.a", - ], - deps = [ - ":headers", - ], -) diff --git a/dev/bazel/deps/micromkldpc.tpl.BUILD b/dev/bazel/deps/micromkldpc.tpl.BUILD deleted file mode 100644 index 844e1150264..00000000000 --- a/dev/bazel/deps/micromkldpc.tpl.BUILD +++ /dev/null @@ -1,18 +0,0 @@ -package(default_visibility = ["//visibility:public"]) - -cc_library( - name = "headers", - hdrs = glob(["include/*.h", "include/*.hpp"]), - includes = [ "include" ], -) - -cc_library( - name = "mkl_dpc", - srcs = [ - "lib/libdaal_sycl.a", - ], - deps = [ - ":headers", - "@opencl//:opencl_binary", - ], -) diff --git a/dev/bazel/deps/mkl.bzl b/dev/bazel/deps/mkl.bzl index e5e0cf15a1f..8fda1bbfa34 100644 --- a/dev/bazel/deps/mkl.bzl +++ b/dev/bazel/deps/mkl.bzl @@ -22,8 +22,9 @@ mkl_repo = repos.prebuilt_libs_repo_rule( ], libs = [ "lib/libmkl_core.a", - "lib/libmkl_sequential.a", "lib/libmkl_intel_ilp64.a", + "lib/libmkl_tbb_thread.a", + "lib/libmkl_sycl.a", ], build_template = "@onedal//dev/bazel/deps:mkl.tpl.BUILD", download_mapping = { @@ -33,11 +34,4 @@ mkl_repo = repos.prebuilt_libs_repo_rule( # In this case, files from `lib/*` will be copied to `lib/intel64/*`. "lib/intel64": "lib/", }, - local_mapping = { - # Required directory layout and layout in the downloaded - # archives may be different. Mapping helps to setup relations - # between required layout (LHS) and downloaded (RHS). - # In this case, files from `lib/*` will be copied to `lib/intel64/*`. - "lib/": "lib/intel64/", - }, ) diff --git a/dev/bazel/deps/mkl.tpl.BUILD b/dev/bazel/deps/mkl.tpl.BUILD index 0d744544d4e..c179f5dfd41 100644 --- a/dev/bazel/deps/mkl.tpl.BUILD +++ b/dev/bazel/deps/mkl.tpl.BUILD @@ -2,8 +2,13 @@ package(default_visibility = ["//visibility:public"]) cc_library( name = "headers", - hdrs = glob(["include/**/*.h"]), - includes = [ "include" ], + hdrs = glob([ + "include/**/*.h", + "include/**/*.hpp", + ]), + includes = [ + "include", + ], defines = [ "MKL_ILP64" ], @@ -13,38 +18,34 @@ cc_library( name = "mkl_core", srcs = [ "lib/libmkl_core.a", + "lib/libmkl_intel_ilp64.a", + "lib/libmkl_tbb_thread.a", ], linkopts = [ "-lpthread", ], -) - -cc_library( - name = "mkl_intel_ilp64", - srcs = [ - "lib/libmkl_intel_ilp64.a", - ], deps = [ - ":mkl_core", + ":headers", ] ) cc_library( - name = "libmkl_sequential", - srcs = [ - "lib/libmkl_sequential.a", + name = "mkl_thr", + linkopts = [ + "-lpthread", ], deps = [ + ":headers", ":mkl_core", ] ) cc_library( - name = "mkl_seq", + name = "mkl_dpc", + srcs = [ + "lib/libmkl_sycl.a", + ], deps = [ ":headers", - ":mkl_core", - ":mkl_intel_ilp64", - ":libmkl_sequential", ], ) diff --git a/dev/docker/onedal-dev.Dockerfile b/dev/docker/onedal-dev.Dockerfile index fb6c02394cc..46c5681f716 100644 --- a/dev/docker/onedal-dev.Dockerfile +++ b/dev/docker/onedal-dev.Dockerfile @@ -40,6 +40,9 @@ RUN .ci/env/apt.sh dev-base # Installing environment for DPCPP development dependencies RUN .ci/env/apt.sh dpcpp +# Installing environment for MKL development dependencies +RUN .ci/env/apt.sh mkl + # Installing environment for clang-format RUN .ci/env/apt.sh clang-format @@ -51,9 +54,6 @@ RUN wget https://github.com/bazelbuild/bazelisk/releases/download/v1.18.0/bazeli # Installing openBLAS dependency RUN .ci/env/openblas.sh -# Installing MKL dependency -RUN ./dev/download_micromkl.sh - # Installing oneTBB dependency RUN ./dev/download_tbb.sh diff --git a/dev/download_micromkl.bat b/dev/download_micromkl.bat deleted file mode 100755 index a38515735a5..00000000000 --- a/dev/download_micromkl.bat +++ /dev/null @@ -1,77 +0,0 @@ -@echo off -rem ============================================================================ -rem Copyright 2018 Intel Corporation -rem -rem Licensed under the Apache License, Version 2.0 (the "License"); -rem you may not use this file except in compliance with the License. -rem You may obtain a copy of the License at -rem -rem http://www.apache.org/licenses/LICENSE-2.0 -rem -rem Unless required by applicable law or agreed to in writing, software -rem distributed under the License is distributed on an "AS IS" BASIS, -rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -rem See the License for the specific language governing permissions and -rem limitations under the License. -rem ============================================================================ - -rem req: PowerShell 3.0+ -powershell.exe -command "if ($PSVersionTable.PSVersion.Major -ge 3) {exit 1} else {Write-Host \"The script requires PowerShell 3.0 or above (current version: $($PSVersionTable.PSVersion.Major).$($PSVersionTable.PSVersion.Minor))\"}" && goto Error_load - -set MKLURLROOT=https://github.com/oneapi-src/oneDAL/releases/download/Dependencies/ -set MKLVERSION=20230413 -set MKLGPUVERSION=20240605 - -set MKLPACKAGE=mklfpk_win_%MKLVERSION% -set MKLGPUPACKAGE=mklgpufpk_win_%MKLGPUVERSION% - -set MKLURL=%MKLURLROOT%%MKLPACKAGE%.zip -set MKLGPUURL=%MKLURLROOT%%MKLGPUPACKAGE%.zip -if /i "%1"=="" ( - set CPUCOND=%~dp0..\__deps\mklfpk - set GPUCOND=%~dp0..\__deps\mklgpufpk -) else ( - set CPUCOND=%1\..\__deps\mklfpk - set GPUCOND=%1\..\__deps\mklgpufpk -) - -set CPUDST=%CPUCOND% -set GPUDST="%GPUCOND%\win" - -CALL :Download_FPK %CPUDST% , %CPUCOND% , %MKLURL% , %MKLPACKAGE% -CALL :Download_FPK %GPUDST% , %GPUCOND% , %MKLGPUURL% , %MKLGPUPACKAGE% - -exit /B 0 - -:Download_FPK - -set DST=%~1 -set CONDITION=%~2 -set SRC=%~3 -set FILENAME=%~4 - -if not exist %DST% mkdir %DST% - - -if not exist "%CONDITION%\win\lib" ( - - powershell.exe -command "(New-Object System.Net.WebClient).DownloadFile('%SRC%', '%DST%\%FILENAME%.zip')" && goto Unpack || goto Error_load - -:Unpack - powershell.exe -command "if (Get-Command Add-Type -errorAction SilentlyContinue) {Add-Type -Assembly \"System.IO.Compression.FileSystem\"; try { [IO.Compression.zipfile]::ExtractToDirectory(\"%DST%\%FILENAME%.zip\", \"%DST%\")}catch{$_.exception ; exit 1}} else {exit 1}" && goto Exit || goto Error_unpack - -:Error_load - echo download_mklfpk.bat : Error: Failed to load %SRC% to %DST%, try to load it manually - exit /B 1 - -:Error_unpack - echo download_mklfpk.bat : Error: Failed to unpack %DST%\%FILENAME%.zip to %DST%, try unpack the archive manually - exit /B 1 - -:Exit - echo Downloaded and unpacked Intel^(R^) MKL small libraries to %DST% - exit /B 0 -) else ( - echo Intel^(R^) MKL small libraries are already installed in %DST% - exit /B 0 -) diff --git a/dev/download_micromkl.sh b/dev/download_micromkl.sh deleted file mode 100755 index 6eb52ddca76..00000000000 --- a/dev/download_micromkl.sh +++ /dev/null @@ -1,106 +0,0 @@ -#!/bin/bash -#=============================================================================== -# Copyright 2018 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -#=============================================================================== - -MKLFPK_URL_ROOT="https://github.com/oneapi-src/oneDAL/releases/download/Dependencies/" -MKLFPK_VERSION="20230413" -MKLFPK_VERSION_MAC="20210426" -MKLGPUFPK_VERSION="20240605" -WITH_GPU=true - -while true ; do - if [ "$1" = "--help" ] ; then - echo "Usage: $0 [with_gpu=true|false]" - echo "Usage example: $0 with_gpu=true" - exit 1 - elif [ "${1:0:8}" = "with_gpu" ] ; then - WITH_GPU=${1:9} - elif [ -z "$1" ] ; then - break - else - echo "Error: unknown paramater $1!" - echo "type $0 --help" - exit 1 - fi - shift -done - -function download_fpk() -{ - SRC=$1 - DST=$2 - CONDITION=$3 - FILENAME=$4 - - mkdir -p "${DST}" - DST=$(cd "${DST}" || exit 1;pwd) - - if [ ! -e "${CONDITION}/${MKLFPK_OS}/lib/" ]; then - if [ -x "$(command -v curl)" ]; then - echo curl -L -o "${DST}/${FILENAME}" "${SRC}" - if curl -L -o "${DST}/${FILENAME}" "${SRC}"; - then - DOWNLOAD_CODE=0 - fi - elif [ -x "$(command -v wget)" ]; then - echo wget -O "${DST}/${FILENAME}" "${SRC}" - if wget -O "${DST}/${FILENAME}" "${SRC}"; - then - DOWNLOAD_CODE=0 - fi - else - echo "curl or wget not available" - exit 1 - fi - - if [ ${DOWNLOAD_CODE} -ne 0 ] || [ ! -e "${DST}/${FILENAME}" ]; then - echo "Download from ${SRC} to ${DST}/${FILENAME} failed" - exit 1 - fi - set -x - - echo tar -xf "${DST}/${FILENAME}" -C "${DST}" - tar -xf "${DST}/${FILENAME}" -C "${DST}" - echo "Downloaded and unpacked oneMKL small libraries to ${DST}" - else - echo "oneMKL small libraries are already installed in ${DST}" - fi -} - -os=$(uname) -if [ "$os" = "Linux" ]; then - MKLFPK_OS=lnx -elif [ "$os" = "Darwin" ]; then - MKLFPK_OS=mac - MKLFPK_VERSION=${MKLFPK_VERSION_MAC} -else - echo "Cannot identify operating system. Try downloading package manually." - exit 1 -fi - -MKLFPK_PACKAGE="mklfpk_${MKLFPK_OS}_${MKLFPK_VERSION}" -MKLGPUFPK_PACKAGE="mklgpufpk_${MKLFPK_OS}_${MKLGPUFPK_VERSION}" -MKLFPK_URL=${MKLFPK_URL_ROOT}${MKLFPK_PACKAGE}.tgz -MKLGPUFPK_URL=${MKLFPK_URL_ROOT}${MKLGPUFPK_PACKAGE}.tgz -CPUCOND=$(dirname "$0")/../__deps/mklfpk -GPUCOND=$(dirname "$0")/../__deps/mklgpufpk -CPUDST="${CPUCOND}" -GPUDST="${GPUCOND}/${MKLFPK_OS}" - -download_fpk "${MKLFPK_URL}" "${CPUDST}" "${CPUCOND}" "${MKLFPK_PACKAGE}.tgz" -if [ "${MKLFPK_OS}" != "mac" ] && [ "${WITH_GPU}" == "true" ]; then - download_fpk "${MKLGPUFPK_URL}" "${GPUDST}" "${GPUCOND}" "${MKLGPUFPK_PACKAGE}.tgz" -fi diff --git a/dev/make/deps.mkl.mk b/dev/make/deps.mkl.mk index c533d9fbb78..2c490d1bb66 100644 --- a/dev/make/deps.mkl.mk +++ b/dev/make/deps.mkl.mk @@ -17,44 +17,44 @@ #++ # Math backend (MKL) definitions for makefile #-- -MKLFPKDIR:= $(if $(wildcard $(DIR)/__deps/mklfpk/$(_OS)/*),$(DIR)/__deps/mklfpk, \ - $(if $(wildcard $(MKLFPKROOT)/include/*),$(subst \,/,$(MKLFPKROOT)), \ - $(error Can`t find MKLFPK libs nether in $(DIR)/__deps/mklfpk/$(_OS) not in MKLFPKROOT.))) -MKLFPKDIR.include := $(MKLFPKDIR)/include $(MKLFPKDIR)/$(if $(OS_is_fbsd),lnx,$(_OS))/include -MKLFPKDIR.libia := $(MKLFPKDIR)/$(if $(OS_is_fbsd),lnx,$(_OS))/lib/$(_IA) -RELEASEDIR.include.mklgpufpk := $(RELEASEDIR.include)/services/internal/sycl/math +MKLDIR:= $(subst \,/,$(MKLROOT)) +MKLDIR.include := $(MKLDIR)/include +MKLDIR.libia := $(MKLDIR)/lib +RELEASEDIR.include.mklgpu := $(RELEASEDIR.include)/services/internal/sycl/math -MKLGPUFPKDIR:= $(if $(wildcard $(DIR)/__deps/mklgpufpk/$(_OS)/*),$(DIR)/__deps/mklgpufpk/$(_OS),$(subst \,/,$(MKLGPUFPKROOT))) -MKLGPUFPKDIR.include := $(MKLGPUFPKDIR)/include -MKLGPUFPKDIR.lib := $(MKLGPUFPKDIR)/lib/ +MKLGPUDIR:= $(subst \,/,$(MKLROOT)) +MKLGPUDIR.include := $(MKLGPUDIR)/include/oneapi +MKLGPUDIR.lib := $(MKLGPUDIR)/lib -mklgpufpk.LIBS_A := $(MKLGPUFPKDIR.lib)/$(plib)daal_sycl$d.$(a) -mklgpufpk.HEADERS := $(MKLGPUFPKDIR.include)/mkl_dal_sycl.hpp $(MKLGPUFPKDIR.include)/mkl_dal_blas_sycl.hpp +mklgpu.HEADERS := $(MKLGPUDIR.include)/mkl.hpp -daaldep.math_backend.incdir := $(MKLFPKDIR.include) $(MKLGPUFPKDIR.include) -daaldep.math_backend_oneapi.incdir := $(MKLFPKDIR.include) $(MKLGPUFPKDIR.include) +daaldep.math_backend.incdir := $(MKLDIR.include) +daaldep.math_backend_oneapi.incdir := $(MKLDIR.include) $(MKLGPUDIR.include) -daaldep.lnx32e.mkl.thr := $(MKLFPKDIR.libia)/$(plib)daal_mkl_thread.$a -daaldep.lnx32e.mkl.seq := $(MKLFPKDIR.libia)/$(plib)daal_mkl_sequential.$a -daaldep.lnx32e.mkl := $(MKLFPKDIR.libia)/$(plib)daal_vmlipp_core.$a +daaldep.lnx32e.mkl.thr := $(MKLDIR.libia)/$(plib)mkl_tbb_thread.$a +daaldep.lnx32e.mkl.seq := $(MKLDIR.libia)/$(plib)mkl_sequential.$a +daaldep.lnx32e.mkl.core := $(MKLDIR.libia)/$(plib)mkl_core.$a +daaldep.lnx32e.mkl.interfaces := $(MKLDIR.libia)/$(plib)mkl_intel_ilp64.$a +daaldep.lnx32e.mkl.sycl := $(MKLGPUDIR.lib)/$(plib)mkl_sycl.$a -daaldep.win32e.mkl.thr := $(MKLFPKDIR.libia)/daal_mkl_thread$d.$a -daaldep.win32e.mkl.seq := $(MKLFPKDIR.libia)/daal_mkl_sequential.$a -daaldep.win32e.mkl := $(MKLFPKDIR.libia)/$(plib)daal_vmlipp_core$d.$a +daaldep.win32e.mkl.thr := $(MKLDIR.libia)/mkl_tbb_thread$d.$a +daaldep.win32e.mkl.seq := $(MKLDIR.libia)/mkl_sequential.$a +daaldep.win32e.mkl.interfaces := $(MKLDIR.libia)/mkl_intel_ilp64.$a +daaldep.win32e.mkl.core := $(MKLDIR.libia)/mkl_core.$a +daaldep.win32e.mkl.sycl := $(MKLGPUDIR.lib)/mkl_sycl$d.$a -daaldep.mac32e.mkl.thr := $(MKLFPKDIR.libia)/$(plib)daal_mkl_thread.$a -daaldep.mac32e.mkl.seq := $(MKLFPKDIR.libia)/$(plib)daal_mkl_sequential.$a -daaldep.mac32e.mkl := $(MKLFPKDIR.libia)/$(plib)daal_vmlipp_core.$a +daaldep.fbsd32e.mkl.thr := $(MKLDIR.libia)/$(plib)mkl_tbb_thread.$a +daaldep.fbsd32e.mkl.seq := $(MKLDIR.libia)/$(plib)mkl_sequential.$a +daaldep.fbsd32e.mkl.interfaces := $(MKLDIR.libia)/$(plib)mkl_intel_ilp64.$a +daaldep.fbsd32e.mkl.core := $(MKLDIR.libia)/$(plib)mkl_core.$a +daaldep.fbsd32e.mkl.sycl := $(MKLGPUDIR.lib)/$(plib)mkl_sycl.$a -daaldep.fbsd32e.mkl.thr := $(MKLFPKDIR.libia)/$(plib)daal_mkl_thread.$a -daaldep.fbsd32e.mkl.seq := $(MKLFPKDIR.libia)/$(plib)daal_mkl_sequential.$a -daaldep.fbsd32e.mkl := $(MKLFPKDIR.libia)/$(plib)daal_vmlipp_core.$a - - -daaldep.mkl := $(daaldep.$(PLAT).mkl) +daaldep.math_backend.core := $(daaldep.$(PLAT).mkl.core) +daaldep.math_backend.interfaces := $(daaldep.$(PLAT).mkl.interfaces) daaldep.math_backend.thr := $(daaldep.$(PLAT).mkl.thr) -daaldep.math_backend.seq := $(daaldep.$(PLAT).mkl.seq) $(daaldep.mkl) +daaldep.math_backend.seq := $(daaldep.$(PLAT).mkl.seq) +daaldep.math_backend.sycl := $(daaldep.$(PLAT).mkl.sycl) daaldep.lnx32e.vml := daaldep.lnx32e.ipp := $(if $(COV.libia),$(COV.libia)/libcov.a) @@ -71,4 +71,5 @@ daaldep.fbsd32e.ipp := $(if $(COV.libia),$(COV.libia)/libcov.a) daaldep.vml := $(daaldep.$(PLAT).vml) daaldep.ipp := $(daaldep.$(PLAT).ipp) -daaldep.math_backend.ext := $(daaldep.ipp) $(daaldep.vml) $(daaldep.mkl) +daaldep.math_backend.ext := $(daaldep.ipp) $(daaldep.vml) $(daaldep.math_backend.interfaces) $(daaldep.math_backend.thr) $(daaldep.math_backend.core) +daaldep.math_backend.sycl := $(daaldep.math_backend.sycl) diff --git a/dev/make/deps.ref.mk b/dev/make/deps.ref.mk index 81a65cb7d5e..7d240eb8f8d 100644 --- a/dev/make/deps.ref.mk +++ b/dev/make/deps.ref.mk @@ -30,3 +30,6 @@ daaldep.math_backend.seq := $(OPENBLASDIR.libia)/libopenblas.$a daaldep.math_backend.incdir := $(OPENBLASDIR.include) daaldep.math_backend_oneapi.incdir := $(OPENBLASDIR.include) +daaldep.math_backend.ext := $(daaldep.math_backend.thr) +daaldep.math_backend.sycl := $(daaldep.math_backend.thr) +daaldep.math_backend.oneapi := $(daaldep.math_backend.thr) diff --git a/makefile b/makefile index d4d3ee6bc55..9a9c8be5e6a 100644 --- a/makefile +++ b/makefile @@ -120,6 +120,7 @@ y := $(notdir $(filter $(_OS)/%,lnx/so win/dll mac/dylib)) -Q := $(if $(OS_is_win),$(if $(COMPILER_is_vc),-,-Q),-) -cxx17 := $(if $(COMPILER_is_vc),/std:c++17,$(-Q)std=c++17) -fPIC := $(if $(OS_is_win),,-fPIC) +-DMKL_ILP64 := $(if $(filter mkl,$(BACKEND_CONFIG)),-DMKL_ILP64) -Zl := $(-Zl.$(COMPILER)) -DEBC := $(if $(REQDBG),$(-DEBC.$(COMPILER)) -DDEBUG_ASSERT -DONEDAL_ENABLE_ASSERT) -DTBB_SUPPRESS_DEPRECATED_MESSAGES -D__TBB_LEGACY_MODE -DEBJ := $(if $(REQDBG),-g,-g:none) @@ -271,15 +272,7 @@ releasetbb.LIBS_Y := $(TBBDIR.soia)/$(plib)tbb$(if $(OS_is_win),12$(dtbb),).$(y) $(if $(wildcard $(TBBDIR.soia)/libtbbmalloc.2.dylib),$(wildcard $(TBBDIR.soia)/libtbbmalloc.2.dylib))) -#============================= Micromkl folders ===================================== -RELEASEDIR.include.mklgpufpk := $(RELEASEDIR.include)/services/internal/sycl/math - -MKLGPUFPKDIR:= $(if $(wildcard $(DIR)/__deps/mklgpufpk/$(_OS)/*),$(DIR)/__deps/mklgpufpk/$(_OS),$(subst \,/,$(MKLGPUFPKROOT))) -MKLGPUFPKDIR.include := $(MKLGPUFPKDIR)/include -MKLGPUFPKDIR.lib := $(MKLGPUFPKDIR)/lib - -mklgpufpk.LIBS_A := $(MKLGPUFPKDIR.lib)/$(plib)daal_sycl$d.$(a) -mklgpufpk.HEADERS := $(MKLGPUFPKDIR.include)/mkl_dal_sycl.hpp $(MKLGPUFPKDIR.include)/mkl_dal_blas_sycl.hpp +#============================= Math backend folders ===================================== include dev/make/deps.$(BACKEND_CONFIG).mk @@ -404,7 +397,7 @@ THR.srcdir := $(CPPDIR.daal)/src/threading CORE.srcdir := $(CPPDIR.daal)/src/algorithms EXTERNALS.srcdir := $(CPPDIR.daal)/src/externals -CORE.SERV.srcdir := $(CPPDIR.daal)/src/services +CORE.SERV.srcdir := $(subst \,/,$(CPPDIR.daal)/src/services) CORE.SERV.COMPILER.srcdir := $(CPPDIR.daal)/src/services/compiler/$(CORE.SERV.COMPILER.$(COMPILER)) CORE.srcdirs := $(CORE.SERV.srcdir) $(CORE.srcdir) \ @@ -473,7 +466,7 @@ $(WORKDIR.lib)/$(core_y): $(daaldep.math_backend.ext) \ $(CORE.tmpdir_y)/$(core_y:%.$y=%_link.txt) ; $(LINK.DYNAMIC) ; $(LINK.DYNAMIC.POST) $(CORE.objs_a): $(CORE.tmpdir_a)/inc_a_folders.txt -$(CORE.objs_a): COPT += $(-fPIC) $(-cxx17) $(-Zl) $(-DEBC) +$(CORE.objs_a): COPT += $(-fPIC) $(-cxx17) $(-Zl) $(-DEBC) $(-DMKL_ILP64) $(CORE.objs_a): COPT += -D__TBB_NO_IMPLICIT_LINKAGE -DDAAL_NOTHROW_EXCEPTIONS \ -DDAAL_HIDE_DEPRECATED -DTBB_USE_ASSERT=0 -D_ENABLE_ATOMIC_ALIGNMENT_FIX \ $(if $(CHECK_DLL_SIG),-DDAAL_CHECK_DLL_SIG) @@ -482,7 +475,7 @@ $(CORE.objs_a): COPT += @$(CORE.tmpdir_a)/inc_a_folders.txt $(eval $(call append_uarch_copt,$(CORE.objs_a))) $(CORE.objs_y): $(CORE.tmpdir_y)/inc_y_folders.txt -$(CORE.objs_y): COPT += $(-fPIC) $(-cxx17) $(-Zl) $(-DEBC) +$(CORE.objs_y): COPT += $(-fPIC) $(-cxx17) $(-Zl) $(-DEBC) $(-DMKL_ILP64) $(CORE.objs_y): COPT += -D__DAAL_IMPLEMENTATION \ -D__TBB_NO_IMPLICIT_LINKAGE -DDAAL_NOTHROW_EXCEPTIONS \ -DDAAL_HIDE_DEPRECATED -DTBB_USE_ASSERT=0 -D_ENABLE_ATOMIC_ALIGNMENT_FIX \ @@ -533,7 +526,7 @@ PARAMETERS.tmpdir_a.dpc := $(WORKDIR)/parameters_dpc_static PARAMETERS.tmpdir_y.dpc := $(WORKDIR)/parameters_dpc_dynamic ONEAPI.incdirs.common := $(CPPDIR) -ONEAPI.incdirs.thirdp := $(CORE.incdirs.common) $(daaldep.math_backend.incdir) $(TBBDIR.include) +ONEAPI.incdirs.thirdp := $(CORE.incdirs.common) $(daaldep.math_backend_oneapi.incdir) $(TBBDIR.include) ONEAPI.incdirs := $(ONEAPI.incdirs.common) $(CORE.incdirs.thirdp) $(ONEAPI.incdirs.thirdp) ONEAPI.dispatcher_cpu = $(WORKDIR)/oneapi/dal/_dal_cpu_dispatcher_gen.hpp @@ -665,7 +658,7 @@ $(eval $(call update_copt_from_dispatcher_tag,$(ONEAPI.objs_a.dpc),.dpcpp)) # Set compilation options to the object files which are part of DYNAMIC lib $(ONEAPI.objs_y): $(ONEAPI.dispatcher_cpu) $(ONEAPI.tmpdir_y)/inc_y_folders.txt -$(ONEAPI.objs_y): COPT += $(-fPIC) $(-cxx17) $(-Zl) $(-DEBC) $(-EHsc) $(pedantic.opts) \ +$(ONEAPI.objs_y): COPT += $(-fPIC) $(-cxx17) $(-Zl) $(-DMKL_ILP64) $(-DEBC) $(-EHsc) $(pedantic.opts) \ -DDAAL_NOTHROW_EXCEPTIONS \ -DDAAL_HIDE_DEPRECATED \ -D_ENABLE_ATOMIC_ALIGNMENT_FIX \ @@ -678,7 +671,7 @@ $(ONEAPI.objs_y): COPT += $(-fPIC) $(-cxx17) $(-Zl) $(-DEBC) $(-EHsc) $(pedantic $(eval $(call update_copt_from_dispatcher_tag,$(ONEAPI.objs_y))) $(ONEAPI.objs_y.dpc): $(ONEAPI.dispatcher_cpu) $(ONEAPI.tmpdir_y.dpc)/inc_y_folders.txt -$(ONEAPI.objs_y.dpc): COPT += $(-fPIC) $(-cxx17) $(-DEBC) $(-EHsc) $(pedantic.opts.dpcpp) \ +$(ONEAPI.objs_y.dpc): COPT += $(-fPIC) $(-cxx17) $(-DMKL_ILP64) $(-DEBC) $(-EHsc) $(pedantic.opts.dpcpp) \ -DDAAL_NOTHROW_EXCEPTIONS \ -DDAAL_HIDE_DEPRECATED \ -DONEDAL_DATA_PARALLEL \ @@ -771,7 +764,8 @@ $(WORKDIR.lib)/$(oneapi_y.dpc): LOPT += $(if $(REQDBG),-flink-huge-device-code,) $(WORKDIR.lib)/$(oneapi_y.dpc): LOPT += $(if $(OS_is_win),-IMPLIB:$(@:%.$(MAJORBINARY).dll=%_dll.lib),) $(WORKDIR.lib)/$(oneapi_y.dpc): LOPT += $(if $(OS_is_win),$(WORKDIR.lib)/$(core_y:%.$(MAJORBINARY).dll=%_dll.lib)) $(WORKDIR.lib)/$(oneapi_y.dpc): LOPT += $(if $(OS_is_win),sycl$d.lib OpenCL.lib) -$(WORKDIR.lib)/$(oneapi_y.dpc): LOPT += $(mklgpufpk.LIBS_A) +$(WORKDIR.lib)/$(oneapi_y.dpc): LOPT += $(daaldep.math_backend.sycl) + ifdef OS_is_win $(WORKDIR.lib)/$(oneapi_y.dpc:%.$(MAJORBINARY).dll=%_dll.lib): $(WORKDIR.lib)/$(oneapi_y.dpc) endif @@ -814,14 +808,14 @@ THR_TBB.objs_y := $(addprefix $(THR.tmpdir_y)/,$(THR.srcs:%.cpp=%_tbb.$o)) -include $(THR.tmpdir_y)/*.d $(WORKDIR.lib)/$(thr_tbb_a): LOPT:= -$(WORKDIR.lib)/$(thr_tbb_a): $(THR_TBB.objs_a) $(daaldep.math_backend.thr) ; $(LINK.STATIC) +$(WORKDIR.lib)/$(thr_tbb_a): $(THR_TBB.objs_a) ; $(LINK.STATIC) $(THR.tmpdir_y)/%_link.def: $(THR.srcdir)/$(daaldep.$(PLAT).threxport) | $(THR.tmpdir_y)/. $(daaldep.$(_OS).threxport.create) > $@ $(WORKDIR.lib)/$(thr_tbb_y): LOPT += $(-fPIC) $(daaldep.rt.thr) $(WORKDIR.lib)/$(thr_tbb_y): LOPT += $(if $(OS_is_win),-IMPLIB:$(@:%.dll=%_dll.lib),) -$(WORKDIR.lib)/$(thr_tbb_y): $(THR_TBB.objs_y) $(daaldep.math_backend.thr) $(if $(OS_is_win),$(THR.tmpdir_y)/dll_tbb.res,) $(THR.tmpdir_y)/$(thr_tbb_y:%.$y=%_link.def) ; $(LINK.DYNAMIC) ; $(LINK.DYNAMIC.POST) +$(WORKDIR.lib)/$(thr_tbb_y): $(THR_TBB.objs_y) $(if $(OS_is_win),$(THR.tmpdir_y)/dll_tbb.res,) $(THR.tmpdir_y)/$(thr_tbb_y:%.$y=%_link.def) ; $(LINK.DYNAMIC) ; $(LINK.DYNAMIC.POST) THR.objs_a := $(THR_TBB.objs_a) THR.objs_y := $(THR_TBB.objs_y) @@ -966,17 +960,17 @@ $(foreach x,$(release.PARAMETERS.LIBS_Y.dpc),$(eval $(call .release.y_win,$x,$(R endif endif -ifneq ($(MKLGPUFPKDIR),) +ifneq ($(MKLGPUDIR),) # Copies the file to the destination directory and renames daal -> onedal # $1: Path to the file to be copied # $2: Destination directory define .release.sycl.old -_release_common: $2/$(subst daal_sycl$d.$a,onedal_sycl$d.$a,$(notdir $1)) -$2/$(subst daal_sycl$d.$a,onedal_sycl$d.$a,$(notdir $1)): $(call frompf1,$1) | $2/. ; $(value cpy) +_release_common: $2/$(subst mkl_sycl$d.$a,onedal_sycl$d.$a,$(notdir $1)) +$2/$(subst mkl_sycl$d.$a,onedal_sycl$d.$a,$(notdir $1)): $(call frompf1,$1) | $2/. ; $(value cpy) endef -$(foreach t,$(mklgpufpk.HEADERS),$(eval $(call .release.sycl.old,$t,$(RELEASEDIR.include.mklgpufpk)))) -$(foreach t,$(mklgpufpk.LIBS_A), $(eval $(call .release.sycl.old,$t,$(RELEASEDIR.libia)))) +$(foreach t,$(mklgpu.HEADERS),$(eval $(call .release.sycl.old,$t,$(RELEASEDIR.include.mklgpu)))) +$(foreach t,$(daaldep.math_backend.sycl), $(eval $(call .release.sycl.old,$t,$(RELEASEDIR.libia)))) endif _release_c: ./deploy/pkg-config/pkg-config.tpl @@ -1100,6 +1094,4 @@ Flags: endef daal_dbg: - @echo "1" "!$(mklgpufpk.LIBS_A)!" - @echo "2" "!$(MKLGPUFPKDIR)!" - @echo "3" "!$(MKLGPUFPKROOT)!" + @echo "1" "!$(MKLDIR)!"