Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix for clang versions >= 5 #16

Open
wants to merge 18 commits into
base: master
Choose a base branch
from
147 changes: 111 additions & 36 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,30 +1,51 @@

cmake_minimum_required(VERSION 3.7 FATAL_ERROR)
project (HPTT C CXX)
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
project (HPTT VERSION 1.0.0 LANGUAGES C CXX)

set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(ENABLE_IBM OFF)
set(CMAKE_NO_SYSTEM_FROM_IMPORTED ON)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY ON)

option(ENABLE_IBM OFF)

if(NOT CMAKE_BUILD_TYPE)
set (CMAKE_BUILD_TYPE Release)
endif()

include(CheckCXXCompilerFlag)

if(CMAKE_SYSTEM_PROCESSOR STREQUAL "ppc64le")
set(ENABLE_IBM ON)
endif()

if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set(HPTT_CXX_FLAGS ${HPTT_CXX_FLAGS} -qopenmp -xhost)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if(ENABLE_IBM)
set(HPTT_CXX_FLAGS ${HPTT_CXX_FLAGS} -fopenmp)
else()
set(HPTT_CXX_FLAGS ${HPTT_CXX_FLAGS} -fopenmp -march=native -mtune=native)

if(DEFINED MARCH_FLAGS)
set(HPTT_ARCH_FLAGS ${MARCH_FLAGS})
else()
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set(HPTT_ARCH_FLAGS -xhost)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if(NOT ENABLE_IBM)
set(HPTT_ARCH_FLAGS -march=native) # -mtune=native
endif()
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(HPTT_ARCH_FLAGS -march=native)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "PGI")
set(HPTT_ARCH_FLAGS -silent -w -Mnovect)
# elseif(CMAKE_CXX_COMPILER_ID STREQUAL "XL")
# set(HPTT_ARCH_FLAGS -qsmp=omp)
#elseif(CMAKE_CXX_COMPILER_ID STREQUAL "ARMClang")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
set(HPTT_ARCH_FLAGS -mcpu=native)
endif()
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(HPTT_CXX_FLAGS ${HPTT_CXX_FLAGS} -fopenmp -march=native)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "PGI")
set(HPTT_CXX_FLAGS ${HPTT_CXX_FLAGS} -silent -w -Mnovect)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "XL")
set(HPTT_CXX_FLAGS ${HPTT_CXX_FLAGS} -qsmp=omp)
endif()

check_cxx_compiler_flag("${HPTT_ARCH_FLAGS}" __COMPILER_SUPPORTS_MARCH)
if(__COMPILER_SUPPORTS_MARCH)
set(HPTT_CXX_FLAGS "${HPTT_ARCH_FLAGS}")
endif()

if(ENABLE_AVX)
Expand All @@ -35,26 +56,80 @@ elseif(ENABLE_IBM)
set(HPTT_CXX_FLAGS ${HPTT_CXX_FLAGS} -mtune=native -DHPTT_ARCH_IBM -maltivec -mabi=altivec)
endif()

if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "8.2")
set(HPTT_CXX_FLAGS ${HPTT_CXX_FLAGS} -O2)
endif()

set(HPTT_SRCS src/hptt.cpp src/plan.cpp src/transpose.cpp src/utils.cpp)

add_library(hptt STATIC ${HPTT_SRCS})
target_compile_features(hptt PUBLIC cxx_std_11)
target_include_directories(hptt PUBLIC ${PROJECT_SOURCE_DIR}/include)
#target_compile_definitions(hptt PRIVATE ${HPTT_CXX_COMPILE_DEFS})
target_compile_options(hptt PUBLIC ${HPTT_CXX_FLAGS})
add_library(hptt ${HPTT_SRCS})

add_library(hptt::hptt ALIAS hptt)

target_include_directories(hptt
PUBLIC
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
)

target_compile_options(hptt PRIVATE ${HPTT_CXX_FLAGS})

if(ENABLE_OPENMP)
find_package(OpenMP REQUIRED)
target_link_libraries(hptt PUBLIC OpenMP::OpenMP_CXX)
endif()

set_target_properties(hptt PROPERTIES EXPORT_NAME hptt)

# Install

include(GNUInstallDirs)
set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/hptt)

install(TARGETS hptt
LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib
ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib)

set(HPTT_INCLUDES
include/compute_node.h
include/hptt_types.h
include/hptt.h
include/macros.h
include/plan.h
include/utils.h
include/transpose.h)

install(FILES ${HPTT_INCLUDES}
DESTINATION ${CMAKE_INSTALL_PREFIX}/include)
EXPORT hptt-targets
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
)


install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/hptt)

#Export the targets to a script
install(EXPORT hptt-targets
FILE
hptt-targets.cmake
NAMESPACE
hptt::
DESTINATION
${INSTALL_CONFIGDIR}
)

#Create a ConfigVersion.cmake file
include(CMakePackageConfigHelpers)
write_basic_package_version_file(
${CMAKE_CURRENT_BINARY_DIR}/hptt-config-version.cmake
VERSION ${PROJECT_VERSION}
COMPATIBILITY AnyNewerVersion
)

configure_package_config_file(${CMAKE_CURRENT_LIST_DIR}/cmake/hptt-config.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/hptt-config.cmake
INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
PATH_VARS CMAKE_INSTALL_INCLUDEDIR
)

#Install the config, configversion and custom find modules
install(FILES
${CMAKE_CURRENT_BINARY_DIR}/hptt-config.cmake
${CMAKE_CURRENT_BINARY_DIR}/hptt-config-version.cmake
DESTINATION ${INSTALL_CONFIGDIR}
)


export(EXPORT hptt-targets
FILE ${CMAKE_CURRENT_BINARY_DIR}/hptt-targets.cmake
NAMESPACE hptt::)

#Register package in user's package registry
export(PACKAGE hptt)
18 changes: 18 additions & 0 deletions cmake/hptt-config.cmake.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@

@PACKAGE_INIT@

set(ENABLE_OPENMP @ENABLE_OPENMP@)

if(ENABLE_OPENMP)
# include( CMakeFindDependencyMacro )
find_package(OpenMP REQUIRED)
endif()

if(NOT TARGET hptt::hptt)
include("${CMAKE_CURRENT_LIST_DIR}/hptt-targets.cmake")
endif()

set(HPTT_FOUND TRUE)
set(HPTT_LIBRARIES hptt::hptt)
set(HPTT_INCLUDE_DIRS "@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@")

10 changes: 5 additions & 5 deletions include/compute_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ class ComputeNode
delete next;
}

size_t start; //!< start index for at the current loop
size_t end; //!< end index for at the current loop
size_t inc; //!< increment for at the current loop
size_t lda; //!< stride of A w.r.t. the loop index
size_t ldb; //!< stride of B w.r.t. the loop index
int start; //!< start index for at the current loop
int end; //!< end index for at the current loop
int inc; //!< increment for at the current loop
int lda; //!< stride of A w.r.t. the loop index
int ldb; //!< stride of B w.r.t. the loop index
ComputeNode *next; //!< next ComputeNode, this might be another loop or 'nullptr' (i.e., indicating that the macro-kernel should be called)
};

Expand Down
1 change: 0 additions & 1 deletion include/hptt_types.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#pragma once

#include <complex>
#include <complex.h>

#define REGISTER_BITS 256 // AVX
#ifdef HPTT_ARCH_ARM
Expand Down
2 changes: 1 addition & 1 deletion include/plan.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class ComputeNode;
class Plan
{
public:
Plan() : rootNodes_(nullptr), numTasks_(0) { }
Plan() : numTasks_(0), rootNodes_(nullptr) { }

Plan(std::vector<int>loopOrder, std::vector<int>numThreadsAtLoop);

Expand Down
10 changes: 5 additions & 5 deletions include/transpose.h
Original file line number Diff line number Diff line change
Expand Up @@ -252,12 +252,12 @@ class Transpose
floatType alpha_; //!< scaling factor for A
floatType beta_; //!< scaling factor for B
int dim_; //!< dimension of the tensor
std::vector<size_t> sizeA_; //!< size of A
std::vector<int> sizeA_; //!< size of A
std::vector<int> perm_; //!< permutation
std::vector<size_t> outerSizeA_; //!< outer sizes of A
std::vector<size_t> outerSizeB_; //!< outer sizes of B
std::vector<size_t> lda_; //!< strides for all dimensions of A (first dimension has a stride of 1)
std::vector<size_t> ldb_; //!< strides for all dimensions of B (first dimension has a stride of 1)
std::vector<int> outerSizeA_; //!< outer sizes of A
std::vector<int> outerSizeB_; //!< outer sizes of B
std::vector<int> lda_; //!< strides for all dimensions of A (first dimension has a stride of 1)
std::vector<int> ldb_; //!< strides for all dimensions of B (first dimension has a stride of 1)
std::vector<int> threadIds_; //!< OpenMP threadIds of the threads involed in the transposition
int numThreads_;
int selectedParallelStrategyId_;
Expand Down
3 changes: 2 additions & 1 deletion include/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ template<typename floatType>
static floatType conj(floatType x){
return std::conj(x);
}

template<>
float conj(float x){
return x;
Expand Down Expand Up @@ -66,7 +67,7 @@ void getPrimeFactors( int n, std::list<int> &primeFactors );
template<typename t>
int findPos(t value, const std::vector<t> &array)
{
for(int i=0;i < array.size() ; ++i)
for(size_t i = 0; i < array.size(); ++i)
if( array[i] == value )
return i;
return -1;
Expand Down
8 changes: 6 additions & 2 deletions src/hptt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,10 @@ void cTensorTranspose( const int *perm, const int dim,
const float _Complex beta, float _Complex *B, const int *outerSizeB,
const int numThreads, const int useRowMajor)
{
const hptt::FloatComplex* calpha = reinterpret_cast<const hptt::FloatComplex*>(&alpha);
const hptt::FloatComplex* cbeta = reinterpret_cast<const hptt::FloatComplex*>(&beta);
auto plan(std::make_shared<hptt::Transpose<hptt::FloatComplex> >(sizeA, perm, outerSizeA, outerSizeB, dim,
(const hptt::FloatComplex*) A, (hptt::FloatComplex) alpha, (hptt::FloatComplex*) B, (hptt::FloatComplex) beta, hptt::ESTIMATE, numThreads, nullptr, useRowMajor));
(const hptt::FloatComplex*) A, *calpha, (hptt::FloatComplex*) B, *cbeta, hptt::ESTIMATE, numThreads, nullptr, useRowMajor));
plan->setConjA(conjA);
plan->execute();
}
Expand All @@ -191,8 +193,10 @@ void zTensorTranspose( const int *perm, const int dim,
const double _Complex beta, double _Complex *B, const int *outerSizeB,
const int numThreads, const int useRowMajor)
{
const hptt::DoubleComplex* calpha = reinterpret_cast<const hptt::DoubleComplex*>(&alpha);
const hptt::DoubleComplex* cbeta = reinterpret_cast<const hptt::DoubleComplex*>(&beta);
auto plan(std::make_shared<hptt::Transpose<hptt::DoubleComplex> >(sizeA, perm, outerSizeA, outerSizeB, dim,
(const hptt::DoubleComplex*) A, (hptt::DoubleComplex) alpha, (hptt::DoubleComplex*) B, (hptt::DoubleComplex) beta, hptt::ESTIMATE, numThreads, nullptr, useRowMajor));
(const hptt::DoubleComplex*) A, *calpha, (hptt::DoubleComplex*) B, *cbeta, hptt::ESTIMATE, numThreads, nullptr, useRowMajor));
plan->setConjA(conjA);
plan->execute();
}
Expand Down
2 changes: 1 addition & 1 deletion src/plan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

namespace hptt {

Plan::Plan(std::vector<int>loopOrder, std::vector<int>numThreadsAtLoop) : rootNodes_(nullptr), loopOrder_(loopOrder), numThreadsAtLoop_(numThreadsAtLoop) {
Plan::Plan(std::vector<int>loopOrder, std::vector<int>numThreadsAtLoop) : loopOrder_(loopOrder), numThreadsAtLoop_(numThreadsAtLoop), rootNodes_(nullptr) {
numTasks_ = 1;
for(auto nt : numThreadsAtLoop)
numTasks_ *= nt;
Expand Down
Loading