Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove MIOpenGEMM and MIOpenTensile leftovers #2499

Merged
merged 31 commits into from
Dec 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
93da535
Remove MIOpenGEMM leftovers
averinevg Nov 2, 2023
593bd49
Fix formatting
averinevg Nov 2, 2023
283b2a1
Remove MIOpenTensile leftovers
averinevg Nov 2, 2023
d02a36e
Merge branch 'develop' into ea_remove_miopengemm_leftovers
averinevg Nov 2, 2023
d2b84e8
handle, kernel cache: remove is_kernel_str
averinevg Nov 2, 2023
d087774
Fix formatting
averinevg Nov 2, 2023
84b9701
Remove is_kernel_str
averinevg Nov 2, 2023
d62b888
Fix formatting
averinevg Nov 2, 2023
0199d2f
Fix fin
averinevg Nov 3, 2023
55aeede
Fix fin
averinevg Nov 3, 2023
9dfb14f
Revert "Fix fin"
averinevg Nov 3, 2023
2f89994
Revert "Fix fin"
averinevg Nov 3, 2023
364654b
Revert "Fix formatting"
averinevg Nov 3, 2023
b192b52
Revert "Remove is_kernel_str"
averinevg Nov 3, 2023
23c0fe7
Revert "Fix formatting"
averinevg Nov 3, 2023
e67645c
Revert "handle, kernel cache: remove is_kernel_str"
averinevg Nov 3, 2023
441e468
Revert changes
averinevg Nov 3, 2023
4cfdd37
Remove is_kernel_str
averinevg Nov 14, 2023
851a603
Fix formatting
averinevg Nov 14, 2023
441d491
Merge branch 'ea_remove_is_kernel_str' into ea_remove_miopengemm_left…
averinevg Nov 14, 2023
7f01cc9
Merge branch 'develop' into ea_remove_miopengemm_leftovers
averinevg Nov 14, 2023
ac863c8
Merge branch 'develop' into ea_remove_miopengemm_leftovers
averinevg Nov 17, 2023
f50f312
Merge branch 'develop' into ea_remove_miopengemm_leftovers
averinevg Nov 21, 2023
5e76345
Merge branch 'develop' into ea_remove_miopengemm_leftovers
averinevg Nov 23, 2023
3759e69
Merge branch 'develop' into ea_remove_miopengemm_leftovers
averinevg Nov 28, 2023
5c00dc7
Merge branch 'develop' into ea_remove_miopengemm_leftovers
averinevg Dec 1, 2023
c5be678
Merge branch 'develop' into ea_remove_miopengemm_leftovers
averinevg Dec 4, 2023
6454b03
Merge branch 'develop' into ea_remove_miopengemm_leftovers
averinevg Dec 13, 2023
65a46e3
Merge branch 'develop' into ea_remove_miopengemm_leftovers
averinevg Dec 14, 2023
befb51f
Merge branch 'develop' into ea_remove_miopengemm_leftovers
averinevg Dec 15, 2023
985fc7a
Merge branch 'develop' into ea_remove_miopengemm_leftovers
averinevg Dec 18, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 0 additions & 9 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -111,13 +111,4 @@ RUN pip3 install -r /doc-requirements.txt
# Composable Kernel requires this version cmake
RUN pip3 install --upgrade cmake==3.27.5

# Use parallel job to accelerate tensile build
# Workaround for Tensile with TargetID feature
ARG USE_TARGETID="OFF"
RUN if [ "$USE_TARGETID" = "ON" ] ; then export HIPCC_LINK_FLAGS_APPEND='-O3 -parallel-jobs=4' && export HIPCC_COMPILE_FLAGS_APPEND='-O3 -Wno-format-nonliteral -parallel-jobs=4' && rm -f /usr/bin/hipcc; fi

# install last released miopentensile in default (master), install latest commits when MIOTENSILE_VER="latest" (develop)
ARG MIOTENSILE_VER="default"
RUN if [ "$USE_TARGETID" = "OFF" ] ; then echo "MIOpenTensile is not installed."; elif [ "$MIOTENSILE_VER" = "latest" ] ; then cget -p $PREFIX install ROCmSoftwarePlatform/MIOpenTensile@94a9047741d16a8eccd290131b78fb1aa69cdcdf; else cget -p $PREFIX install ROCmSoftwarePlatform/MIOpenTensile@94a9047741d16a8eccd290131b78fb1aa69cdcdf; fi

RUN groupadd -f render
4 changes: 1 addition & 3 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -192,10 +192,8 @@ def getDockerImage(Map conf=[:])
env.DOCKER_BUILDKIT=1
def prefixpath = conf.get("prefixpath", "/opt/rocm") // one image for each prefix 1: /usr/local 2:/opt/rocm
def gpu_arch = "gfx900;gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102" // prebuilt dockers should have all the architectures enabled so one image can be used for all stages
def miotensile_version = conf.get("miotensile_version", "default") // deprecated
def target_id = conf.get("target_id", "OFF") // deprecated
def mlir_build = conf.get("mlir_build", "ON") // always ON
def dockerArgs = "--build-arg BUILDKIT_INLINE_CACHE=1 --build-arg PREFIX=${prefixpath} --build-arg GPU_ARCH='${gpu_arch}' --build-arg MIOTENSILE_VER='${miotensile_version}' --build-arg USE_TARGETID='${target_id}' --build-arg USE_MLIR='${mlir_build}' "
def dockerArgs = "--build-arg BUILDKIT_INLINE_CACHE=1 --build-arg PREFIX=${prefixpath} --build-arg GPU_ARCH='${gpu_arch}' --build-arg USE_MLIR='${mlir_build}' "
if(env.CCACHE_HOST)
{
def check_host = sh(script:"""(printf "PING\r\n";) | nc -N ${env.CCACHE_HOST} 6379 """, returnStdout: true).trim()
Expand Down
6 changes: 0 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,13 @@ python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en . _build/html
* HIP -
* HIP and HCC libraries and header files.
* OpenCL - OpenCL libraries and header files.
* [MIOpenGEMM](https://github.com/ROCmSoftwarePlatform/MIOpenGEMM) - enable various functionalities including transposed and dilated convolutions.
* This is optional on the HIP backend, and required on the OpenCL backend.
* Users can enable this library using the cmake configuration flag `-DMIOPEN_USE_MIOPENGEMM=On`, which is enabled by default when OpenCL backend is chosen.
* [ROCm cmake](https://github.com/RadeonOpenCompute/rocm-cmake) - provide cmake modules for common build tasks needed for the ROCM software stack.
* [Half](http://half.sourceforge.net/) - IEEE 754-based half-precision floating point library
* [Boost](http://www.boost.org/)
* MIOpen uses `boost-system` and `boost-filesystem` packages to enable persistent [kernel cache](https://rocm.docs.amd.com/projects/MIOpen/en/latest/cache.html)
* Version 1.79 is recommended, older version may need patches to work on newer systems, e.g. boost1{69,70,72} w/glibc-2.34
* [SQLite3](https://sqlite.org/index.html) - reading and writing performance database
* lbzip2 - multi-threaded compress or decompress utility
* [MIOpenTENSILE](https://github.com/ROCmSoftwarePlatform/MIOpenTensile) - users can enable this library using the cmake configuration flag`-DMIOPEN_USE_MIOPENTENSILE=On`. (deprecated after ROCm 5.1.1)
* [rocBLAS](https://github.com/ROCm/rocBLAS) - AMD library for Basic Linear Algebra Subprograms (BLAS) on the ROCm platform.
* Minimum version branch for pre-ROCm 3.5 [master-rocm-2.10](https://github.com/ROCm/rocBLAS/tree/master-rocm-2.10)
* Minimum version branch for post-ROCm 3.5 [master-rocm-3.5](https://github.com/ROCm/rocBLAS/releases/tag/rocm-3.5.0)
Expand Down Expand Up @@ -106,8 +102,6 @@ This prefix can used to specify the dependency path during the configuration pha

* MIOpen's HIP backend uses [rocBLAS](https://github.com/ROCm/rocBLAS) by default. Users can install rocBLAS minimum release by using `apt-get install rocblas`. To disable using rocBLAS set the configuration flag `-DMIOPEN_USE_ROCBLAS=Off`. rocBLAS is *not* available for the OpenCL backend.

* MIOpen's OpenCL backend uses [MIOpenGEMM](https://github.com/ROCmSoftwarePlatform/MIOpenGEMM) by default. Users can install MIOpenGEMM minimum release by using `apt-get install miopengemm`.

## Building MIOpen from source

### Configuring with cmake
Expand Down
3 changes: 1 addition & 2 deletions docs/DebugAndLogging.md
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,7 @@ The `ROCBLAS_LAYER` environmental variable can be set to output GEMM information
* `ROCBLAS_LAYER=2` - is set to 2, then there is bench logging
* `ROCBLAS_LAYER=3` - is set to 3, then there is both trace and bench logging

Additionally, using environment variable "MIOPEN_GEMM_ENFORCE_BACKEND", can override the default behavior. The default behavior which is to use
both MIOpenGEMM and rocBlas depending on the input configuration:
Additionally, the environment variable "MIOPEN_GEMM_ENFORCE_BACKEND" can be set to override default GEMM backend (Default GEMM backend is rocBLAS):

* `MIOPEN_GEMM_ENFORCE_BACKEND=1`, use rocBLAS if enabled
* `MIOPEN_GEMM_ENFORCE_BACKEND=2`, reserved
Expand Down
5 changes: 0 additions & 5 deletions docs/install.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@
* HIP -
* HIP and HCC libraries and header files.
* OpenCL - OpenCL libraries and header files.
* [MIOpenGEMM](https://github.com/ROCmSoftwarePlatform/MIOpenGEMM) - enable various functionalities including transposed and dilated convolutions.
* This is optional on the HIP backend, and required on the OpenCL backend.
* Users can enable this library using the cmake configuration flag `-DMIOPEN_USE_MIOPENGEMM=On`, which is enabled by default when OpenCL backend is chosen.
* [ROCm cmake](https://github.com/RadeonOpenCompute/rocm-cmake) - provide cmake modules for common build tasks needed for the ROCM software stack.
* [Half](http://half.sourceforge.net/) - IEEE 754-based half-precision floating point library
* [Boost](http://www.boost.org/)
Expand Down Expand Up @@ -72,5 +69,3 @@ cmake -P install_deps.cmake --minimum --prefix /root/MIOpen/install_dir
This prefix can used to specify the dependency path during the configuration phase using the `CMAKE_PREFIX_PATH`.

* MIOpen's HIP backend uses [rocBLAS](https://github.com/ROCm/rocBLAS) by default. Users can install rocBLAS minimum release by using `apt-get install rocblas`. To disable using rocBLAS set the configuration flag `-DMIOPEN_USE_ROCBLAS=Off`. rocBLAS is *not* available for the OpenCL backend.

* MIOpen's OpenCL backend uses [MIOpenGEMM](https://github.com/ROCm/MIOpenGEMM) by default. Users can install MIOpenGEMM minimum release by using `apt-get install miopengemm`.
4 changes: 1 addition & 3 deletions include/miopen/config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@
#cmakedefine01 MIOPEN_BACKEND_OPENCL
#cmakedefine01 MIOPEN_BACKEND_HIP
#cmakedefine01 MIOPEN_MODE_NOGPU
#cmakedefine01 MIOPEN_USE_MIOPENTENSILE
#cmakedefine01 MIOPEN_USE_MIOPENGEMM
#cmakedefine01 MIOPEN_USE_ROCBLAS
#cmakedefine01 MIOPEN_BUILD_DEV
#cmakedefine01 MIOPEN_GPU_SYNC
Expand Down Expand Up @@ -86,7 +84,7 @@
#cmakedefine MIOPEN_OFFLOADBUNDLER_BIN "@MIOPEN_OFFLOADBUNDLER_BIN@"
#cmakedefine MIOPEN_CACHE_DIR "@MIOPEN_CACHE_DIR@"

#define MIOPEN_USE_GEMM (MIOPEN_USE_MIOPENTENSILE || MIOPEN_USE_MIOPENGEMM || MIOPEN_USE_ROCBLAS)
#define MIOPEN_USE_GEMM (MIOPEN_USE_ROCBLAS)

// Usage of "defined" operator within macro expansion is undefined behavior,
// so "defined(NDEBUG)" cannot be used there... unlike the following macro:
Expand Down
1 change: 0 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,6 @@ set( MIOpen_Source
solver/fft.cpp
solver/gemm.cpp
solver/gemm_bwd.cpp
solver/gemm_common.cpp
solver/gemm_wrw.cpp
solver/norm/forward_layernorm.cpp
solver/norm/forward_layernorm2d_ck.cpp
Expand Down
1 change: 0 additions & 1 deletion src/anyramdb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@

#include <miopen/errors.hpp>
#include <miopen/logger.hpp>
#include <miopen/md5.hpp>

#include <boost/filesystem/operations.hpp>
#include <boost/filesystem.hpp>
Expand Down
49 changes: 15 additions & 34 deletions src/binary_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,52 +145,37 @@ KDb GetDb(const TargetProperties& target, size_t num_cu)
}
#endif

boost::filesystem::path GetCacheFile(const std::string& device,
const std::string& name,
const std::string& args,
bool is_kernel_str)
boost::filesystem::path
GetCacheFile(const std::string& device, const std::string& name, const std::string& args)
{
const std::string filename = (is_kernel_str ? miopen::md5(name) : name) + ".o";
const std::string filename = name + ".o";
return GetCachePath(false) / miopen::md5(device + ":" + args) / filename;
}

#if MIOPEN_ENABLE_SQLITE_KERN_CACHE
static inline std::string GetFilenameForInfo2Logging(const bool is_kernel_str,
const std::string& filename,
const std::string& name)
{
if(!miopen::IsLogging(miopen::LoggingLevel::Info2))
return {}; // Used only in MIOPEN_LOG_I2 -- optimize for speed.
if(is_kernel_str)
return filename + " size=" + std::to_string(name.size());
return filename;
}

std::string LoadBinary(const TargetProperties& target,
const size_t num_cu,
const std::string& name,
const std::string& args,
bool is_kernel_str)
const std::string& args)
{
if(miopen::IsCacheDisabled())
return {};

auto db = GetDb(target, num_cu);

const std::string filename = (is_kernel_str ? miopen::md5(name) : name) + ".o";
const std::string filename = name + ".o";
const KernelConfig cfg{filename, args, ""};

const auto verbose_name = GetFilenameForInfo2Logging(is_kernel_str, filename, name);
MIOPEN_LOG_I2("Loading binary for: " << verbose_name << "; args: " << args);
MIOPEN_LOG_I2("Loading binary for: " << filename << "; args: " << args);
auto record = db.FindRecord(cfg);
if(record)
{
MIOPEN_LOG_I2("Successfully loaded binary for: " << verbose_name << "; args: " << args);
MIOPEN_LOG_I2("Successfully loaded binary for: " << filename << "; args: " << args);
return record.get();
}
else
{
MIOPEN_LOG_I2("Unable to load binary for: " << verbose_name << "; args: " << args);
MIOPEN_LOG_I2("Unable to load binary for: " << filename << "; args: " << args);
return {};
}
}
Expand All @@ -199,33 +184,30 @@ void SaveBinary(const std::string& hsaco,
const TargetProperties& target,
const std::size_t num_cu,
const std::string& name,
const std::string& args,
bool is_kernel_str)
const std::string& args)
{
if(miopen::IsCacheDisabled())
return;

auto db = GetDb(target, num_cu);

const std::string filename = (is_kernel_str ? miopen::md5(name) : name) + ".o";
const std::string filename = name + ".o";
KernelConfig cfg{filename, args, hsaco};

const auto verbose_name = GetFilenameForInfo2Logging(is_kernel_str, filename, name);
MIOPEN_LOG_I2("Saving binary for: " << verbose_name << "; args: " << args);
MIOPEN_LOG_I2("Saving binary for: " << filename << "; args: " << args);
db.StoreRecord(cfg);
}
#else
boost::filesystem::path LoadBinary(const TargetProperties& target,
const size_t num_cu,
const std::string& name,
const std::string& args,
bool is_kernel_str)
const std::string& args)
{
if(miopen::IsCacheDisabled())
return {};

(void)num_cu;
auto f = GetCacheFile(target.DbId(), name, args, is_kernel_str);
auto f = GetCacheFile(target.DbId(), name, args);
if(boost::filesystem::exists(f))
{
return f.string();
Expand All @@ -239,16 +221,15 @@ boost::filesystem::path LoadBinary(const TargetProperties& target,
void SaveBinary(const boost::filesystem::path& binary_path,
const TargetProperties& target,
const std::string& name,
const std::string& args,
bool is_kernel_str)
const std::string& args)
{
if(miopen::IsCacheDisabled())
{
boost::filesystem::remove(binary_path);
}
else
{
auto p = GetCacheFile(target.DbId(), name, args, is_kernel_str);
auto p = GetCacheFile(target.DbId(), name, args);
boost::filesystem::create_directories(p.parent_path());
boost::filesystem::rename(binary_path, p);
}
Expand Down
1 change: 0 additions & 1 deletion src/db.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
#include <miopen/errors.hpp>
#include <miopen/lock_file.hpp>
#include <miopen/logger.hpp>
#include <miopen/md5.hpp>

#include <boost/date_time/posix_time/posix_time_types.hpp>
#include <boost/filesystem.hpp>
Expand Down
9 changes: 3 additions & 6 deletions src/gemm_v2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -643,8 +643,7 @@ miopenStatus_t CallGemm(const Handle& handle,
break;

case miopenDouble: {
MIOPEN_THROW(miopenStatusBadParm,
averinevg marked this conversation as resolved.
Show resolved Hide resolved
"miopenDouble data type not supported by MIOpenGEMM.");
MIOPEN_THROW(miopenStatusBadParm, "miopenDouble data type not supported by rocBLAS.");
};
break;
}
Expand Down Expand Up @@ -918,8 +917,7 @@ miopenStatus_t CallGemmStridedBatched(const Handle& handle,
}

case miopenDouble: {
MIOPEN_THROW(miopenStatusBadParm,
"miopenDouble data type not supported by MIOpenGEMM.");
MIOPEN_THROW(miopenStatusBadParm, "miopenDouble data type not supported by rocBLAS.");
}
break;
}
Expand Down Expand Up @@ -1191,8 +1189,7 @@ miopenStatus_t CallGemmStridedBatchedSequential(const Handle& handle,
}

case miopenDouble: {
MIOPEN_THROW(miopenStatusBadParm,
"miopenDouble data type not supported by MIOpenGEMM.");
MIOPEN_THROW(miopenStatusBadParm, "miopenDouble data type not supported by rocBLAS.");
}
break;
}
Expand Down
25 changes: 7 additions & 18 deletions src/hip/handlehip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
#include <miopen/binary_cache.hpp>
#include <miopen/env.hpp>
#include <miopen/errors.hpp>
#include <miopen/gemm_geometry.hpp>
#include <miopen/handle_lock.hpp>
#include <miopen/invoker.hpp>
#include <miopen/kernel_cache.hpp>
Expand Down Expand Up @@ -441,10 +440,8 @@ KernelInvoke Handle::AddKernel(const std::string& algorithm,
const std::vector<size_t>& vgd,
const std::string& params,
std::size_t cache_index,
bool is_kernel_str,
const std::string& kernel_src) const
{

auto obj = this->impl->cache.AddKernel(*this,
algorithm,
network_config,
Expand All @@ -454,7 +451,6 @@ KernelInvoke Handle::AddKernel(const std::string& algorithm,
vgd,
params,
cache_index,
is_kernel_str,
kernel_src);
return this->Run(obj);
}
Expand Down Expand Up @@ -502,7 +498,6 @@ KernelInvoke Handle::Run(Kernel k) const

Program Handle::LoadProgram(const std::string& program_name,
std::string params,
bool is_kernel_str,
const std::string& kernel_src) const
{
this->impl->set_ctx();
Expand All @@ -513,11 +508,8 @@ Program Handle::LoadProgram(const std::string& program_name,
if(!miopen::EndsWith(program_name, ".mlir"))
params = params + " -mcpu=" + this->GetTargetProperties().Name();

auto hsaco = miopen::LoadBinary(this->GetTargetProperties(),
this->GetMaxComputeUnits(),
program_name,
params,
is_kernel_str);
auto hsaco = miopen::LoadBinary(
this->GetTargetProperties(), this->GetMaxComputeUnits(), program_name, params);
if(hsaco.empty())
{
const auto arch_target_id = miopen::SplitDelim(arch_name, ':');
Expand All @@ -528,8 +520,7 @@ Program Handle::LoadProgram(const std::string& program_name,
hsaco = miopen::LoadBinary(this->GetTargetProperties(),
this->GetMaxComputeUnits(),
program_name,
orig_params + " -mcpu=" + base_arch,
is_kernel_str);
orig_params + " -mcpu=" + base_arch);
}
}

Expand All @@ -538,9 +529,8 @@ Program Handle::LoadProgram(const std::string& program_name,
if(hsaco.empty())
{
CompileTimer ct;
auto p = HIPOCProgram{
program_name, params, is_kernel_str, this->GetTargetProperties(), kernel_src};
ct.Log("Kernel", is_kernel_str ? std::string() : program_name);
auto p = HIPOCProgram{program_name, params, this->GetTargetProperties(), kernel_src};
ct.Log("Kernel", program_name);

// Save to cache
#if MIOPEN_ENABLE_SQLITE_KERN_CACHE
Expand All @@ -550,15 +540,14 @@ Program Handle::LoadProgram(const std::string& program_name,
this->GetTargetProperties(),
this->GetMaxComputeUnits(),
program_name,
params,
is_kernel_str);
params);
#else
auto path = miopen::GetCachePath(false) / boost::filesystem::unique_path();
if(p.IsCodeObjectInMemory())
miopen::WriteFile(p.GetCodeObjectBlob(), path);
else
boost::filesystem::copy_file(p.GetCodeObjectPathname(), path);
miopen::SaveBinary(path, this->GetTargetProperties(), program_name, params, is_kernel_str);
miopen::SaveBinary(path, this->GetTargetProperties(), program_name, params);
#endif
p.FreeCodeObjectFileStorage();
return p;
Expand Down
Loading
Loading