From ade2876ce4373a94b42c87b031accee42b5303d2 Mon Sep 17 00:00:00 2001 From: kth5 Date: Sun, 17 Mar 2024 10:09:20 +0100 Subject: [PATCH] * add rocblas --- rocblas/PKGBUILD | 60 +++++++++++++++++++++++++++++++ rocblas/find-msgpack-5.patch | 11 ++++++ rocblas/test.cpp | 70 ++++++++++++++++++++++++++++++++++++ rocblas/test.sh | 5 +++ 4 files changed, 146 insertions(+) create mode 100644 rocblas/PKGBUILD create mode 100644 rocblas/find-msgpack-5.patch create mode 100644 rocblas/test.cpp create mode 100755 rocblas/test.sh diff --git a/rocblas/PKGBUILD b/rocblas/PKGBUILD new file mode 100644 index 0000000000..b2e95ced77 --- /dev/null +++ b/rocblas/PKGBUILD @@ -0,0 +1,60 @@ +# POWER Maintainer: Alexander Baldeck +# Maintainer: Torsten Keßler +# Contributor: Markus Näther +pkgname=rocblas +pkgver=6.0.2 +pkgrel=1 +pkgdesc='Next generation BLAS implementation for ROCm platform' +arch=(x86_64 powerpc64le powerpc64 riscv64) +url='https://rocblas.readthedocs.io/en/latest' +license=('MIT') +depends=('rocm-core' 'hip' 'glibc' 'gcc-libs' 'openmp') +makedepends=('rocm-cmake' 'python' 'python-virtualenv' 'python-pyaml' 'python-wheel' + 'python-msgpack' 'python-joblib' 'perl-file-which' 'msgpack-cxx' 'gcc-fortran') +_rocblas='https://github.com/ROCmSoftwarePlatform/rocBLAS' +_tensile='https://github.com/ROCmSoftwarePlatform/Tensile' +source=("$pkgname-$pkgver.tar.gz::$_rocblas/archive/rocm-$pkgver.tar.gz" + "$pkgname-tensile-$pkgver.tar.gz::$_tensile/archive/refs/tags/rocm-$pkgver.tar.gz" + "find-msgpack-5.patch") +sha256sums=('d1bf31063a2d349797b88c994c91d05f94e681bafb5550ad9b53529703d89dbb' + '1d8a92422560c1e908fa25fd97a4aa07a96659528a543f77618408ffcfe1f307' + '3f91bf087e4ea72eaef5acd500e16b61aa69c029cfcca14666799a7c42a0c5aa') +options=(!lto) +_dirname="$(basename "$_rocblas")-$(basename "${source[0]}" ".tar.gz")" +_tensile_dir="$(basename "$_tensile")-$(basename "${source[1]}" ".tar.gz")" + +prepare() { + cd "$_tensile_dir" + patch -Np1 -i "$srcdir/find-msgpack-5.patch" +} + +build() { + # Compile source code for supported GPU archs in parallel + export HIPCC_COMPILE_FLAGS_APPEND="-parallel-jobs=$(nproc)" + export HIPCC_LINK_FLAGS_APPEND="-parallel-jobs=$(nproc)" + # -fcf-protection is not supported by HIP, see + # https://rocm.docs.amd.com/en/latest/reference/rocmcc.html#support-status-of-other-clang-options + local cmake_args=( + -Wno-dev + -S "$_dirname" + -B build + -D CMAKE_BUILD_TYPE=None + -D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc + -D CMAKE_TOOLCHAIN_FILE=toolchain-linux.cmake + -D CMAKE_CXX_FLAGS="${CXXFLAGS} -fcf-protection=none" + -D CMAKE_INSTALL_PREFIX=/opt/rocm + -D CMAKE_PREFIX_PATH=/opt/rocm/llvm/lib/cmake/llvm + -D amd_comgr_DIR=/opt/rocm/lib/cmake/amd_comgr + -D BUILD_WITH_TENSILE=ON + -D Tensile_LIBRARY_FORMAT=msgpack + -D Tensile_TEST_LOCAL_PATH="$srcdir/$_tensile_dir" + ) + cmake "${cmake_args[@]}" + cmake --build build +} + +package() { + DESTDIR="$pkgdir" cmake --install build + + install -Dm644 "$_dirname/LICENSE.md" "$pkgdir/usr/share/licenses/$pkgname/LICENSE" +} diff --git a/rocblas/find-msgpack-5.patch b/rocblas/find-msgpack-5.patch new file mode 100644 index 0000000000..37b7f82235 --- /dev/null +++ b/rocblas/find-msgpack-5.patch @@ -0,0 +1,11 @@ +--- Tensile-rocm-5.4.2/Tensile/Source/lib/CMakeLists.txt.bak 2023-01-27 08:30:16.374451318 +0100 ++++ Tensile-rocm-5.4.2/Tensile/Source/lib/CMakeLists.txt 2023-01-27 08:30:33.194515443 +0100 +@@ -103,7 +103,7 @@ + endif() + + if(TENSILE_USE_MSGPACK) +- find_package(msgpack REQUIRED) ++ find_package(msgpackc-cxx REQUIRED) + target_compile_definitions(TensileHost PUBLIC -DTENSILE_MSGPACK=1) + + if(TARGET msgpackc-cxx) diff --git a/rocblas/test.cpp b/rocblas/test.cpp new file mode 100644 index 0000000000..5870f6075b --- /dev/null +++ b/rocblas/test.cpp @@ -0,0 +1,70 @@ +#include +#include +#include +#include +#include +#include +#include + +int main() +{ + size_t n = 128; + size_t size = n * n; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution dist(-1.0, 1.0); + auto myrand = [&](){return dist(gen);}; + + float *x; + float *y; + float *z; + hipMalloc((void**)&x, sizeof *x * size); + hipMalloc((void**)&y, sizeof *y * size); + hipMalloc((void**)&z, sizeof *z * size); + + std::vector xin(size); + std::vector yin(size); + + std::generate(xin.begin(), xin.end(), myrand); + std::generate(yin.begin(), yin.end(), myrand); + + hipMemcpy(x, xin.data(), sizeof *x * size, hipMemcpyHostToDevice); + hipMemcpy(y, yin.data(), sizeof *x * size, hipMemcpyHostToDevice); + + rocblas_handle handle; + rocblas_create_handle(&handle); + + float alpha = 15.412f; + float beta = 0.0f; + rocblas_sgemm(handle, rocblas_operation_none, rocblas_operation_none, + n, n, n, &alpha, x, n, y, n, &beta, z, n); + + std::vector zout(size); + hipMemcpy(zout.data(), z, sizeof *z * size, hipMemcpyDeviceToHost); + + for(size_t j = 0; j < n; j++){ + for(size_t i = 0; i < n; i++){ + for(size_t k = 0; k < n; k++){ + zout[i + j * n] -= alpha * xin[i + k * n] * yin[k + j * n]; + } + } + } + + float tol = 0.001f; + for(size_t i = 0; i < size; i++){ + if(std::abs(zout[i]) > tol){ + std::cout << "Element mismatch at index " << i << "\n"; + std::cout << "Expected: 0\n"; + std::cout << "Actual : " << zout[i] << "\n"; + return 1; + } + } + + std::cout << "TESTS PASSED!" << std::endl; + + hipFree(x); + hipFree(y); + hipFree(z); + rocblas_destroy_handle(handle); +} diff --git a/rocblas/test.sh b/rocblas/test.sh new file mode 100755 index 0000000000..a0220f32e6 --- /dev/null +++ b/rocblas/test.sh @@ -0,0 +1,5 @@ +#! /usr/bin/env sh + +OUT=$(mktemp -d) +/opt/rocm/bin/hipcc -o "$OUT"/test test.cpp -lrocblas +"$OUT"/test