Skip to content

Commit

Permalink
ROCm: transpose_bin() vs transpose_bin_gpu() to fix transpose_32x32_b…
Browse files Browse the repository at this point in the history
…its_reversed_diagonale() link problems (issue #99)
  • Loading branch information
stephanecharette committed Jan 14, 2025
1 parent 507b88c commit 8e37495
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 2 deletions.
4 changes: 4 additions & 0 deletions src-lib/convolutional_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,11 @@ namespace

memset(*t_bit_input, 0, t_bit_input_size * sizeof(char));

#ifdef DARKNET_GPU
transpose_bin_gpu((uint8_t*)b, (uint8_t*)*t_bit_input, k, n, bit_align, new_ldb, 8);
#else
transpose_bin((uint32_t*)b, (uint32_t*)*t_bit_input, k, n, bit_align, new_ldb, 8);
#endif

return t_intput_size;
}
Expand Down
6 changes: 4 additions & 2 deletions src-lib/gemm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -342,13 +342,13 @@ void transpose8rS32_reversed_diagonale(unsigned char* A, unsigned char* B, int m
B[3 * n] = reverse_byte(y >> 24); B[2 * n] = reverse_byte(y >> 16); B[1 * n] = reverse_byte(y >> 8); B[0 * n] = reverse_byte(y);
}

#endif // DARKNET_GPU

// transpose by 32-bit
void transpose_bin(uint32_t *A, uint32_t *B, const int n, const int m, const int lda, const int ldb, const int block_size)
{
TAT(TATPARMS);

/// @note This function is for CPU-only versions of Darknet. See im2col_kernels.cu for GPU version.

//printf("\n n = %d (n mod 32 = %d), m = %d (m mod 32 = %d) \n", n, n % 32, m, m % 32);
//printf("\n lda = %d (lda mod 32 = %d), ldb = %d (ldb mod 32 = %d) \n", lda, lda % 32, ldb, ldb % 32);

Expand All @@ -375,6 +375,8 @@ void transpose_bin(uint32_t *A, uint32_t *B, const int n, const int m, const int
}
}

#endif // DARKNET_GPU

#if (defined(__AVX__) && defined(__x86_64__)) || (defined(_WIN64) && !defined(__MINGW32__) && !defined(_M_ARM64))

#if (defined(_WIN64) && !defined(__MINGW64__))
Expand Down

0 comments on commit 8e37495

Please sign in to comment.