* add rocblas

This commit is contained in:
Alexander Baldeck 2024-03-17 10:09:20 +01:00
parent 43b679fa3f
commit ade2876ce4
4 changed files with 146 additions and 0 deletions

60
rocblas/PKGBUILD Normal file
View File

@ -0,0 +1,60 @@
# POWER Maintainer: Alexander Baldeck <alex.bldck@gmail.com>
# Maintainer: Torsten Keßler <tpkessler at archlinux dot org>
# Contributor: Markus Näther <naether.markus@gmail.com>
pkgname=rocblas
pkgver=6.0.2
pkgrel=1
pkgdesc='Next generation BLAS implementation for ROCm platform'
arch=(x86_64 powerpc64le powerpc64 riscv64)
url='https://rocblas.readthedocs.io/en/latest'
license=('MIT')
depends=('rocm-core' 'hip' 'glibc' 'gcc-libs' 'openmp')
makedepends=('rocm-cmake' 'python' 'python-virtualenv' 'python-pyaml' 'python-wheel'
'python-msgpack' 'python-joblib' 'perl-file-which' 'msgpack-cxx' 'gcc-fortran')
_rocblas='https://github.com/ROCmSoftwarePlatform/rocBLAS'
_tensile='https://github.com/ROCmSoftwarePlatform/Tensile'
source=("$pkgname-$pkgver.tar.gz::$_rocblas/archive/rocm-$pkgver.tar.gz"
"$pkgname-tensile-$pkgver.tar.gz::$_tensile/archive/refs/tags/rocm-$pkgver.tar.gz"
"find-msgpack-5.patch")
sha256sums=('d1bf31063a2d349797b88c994c91d05f94e681bafb5550ad9b53529703d89dbb'
'1d8a92422560c1e908fa25fd97a4aa07a96659528a543f77618408ffcfe1f307'
'3f91bf087e4ea72eaef5acd500e16b61aa69c029cfcca14666799a7c42a0c5aa')
options=(!lto)
_dirname="$(basename "$_rocblas")-$(basename "${source[0]}" ".tar.gz")"
_tensile_dir="$(basename "$_tensile")-$(basename "${source[1]}" ".tar.gz")"
prepare() {
cd "$_tensile_dir"
patch -Np1 -i "$srcdir/find-msgpack-5.patch"
}
build() {
# Compile source code for supported GPU archs in parallel
export HIPCC_COMPILE_FLAGS_APPEND="-parallel-jobs=$(nproc)"
export HIPCC_LINK_FLAGS_APPEND="-parallel-jobs=$(nproc)"
# -fcf-protection is not supported by HIP, see
# https://rocm.docs.amd.com/en/latest/reference/rocmcc.html#support-status-of-other-clang-options
local cmake_args=(
-Wno-dev
-S "$_dirname"
-B build
-D CMAKE_BUILD_TYPE=None
-D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc
-D CMAKE_TOOLCHAIN_FILE=toolchain-linux.cmake
-D CMAKE_CXX_FLAGS="${CXXFLAGS} -fcf-protection=none"
-D CMAKE_INSTALL_PREFIX=/opt/rocm
-D CMAKE_PREFIX_PATH=/opt/rocm/llvm/lib/cmake/llvm
-D amd_comgr_DIR=/opt/rocm/lib/cmake/amd_comgr
-D BUILD_WITH_TENSILE=ON
-D Tensile_LIBRARY_FORMAT=msgpack
-D Tensile_TEST_LOCAL_PATH="$srcdir/$_tensile_dir"
)
cmake "${cmake_args[@]}"
cmake --build build
}
package() {
DESTDIR="$pkgdir" cmake --install build
install -Dm644 "$_dirname/LICENSE.md" "$pkgdir/usr/share/licenses/$pkgname/LICENSE"
}

View File

@ -0,0 +1,11 @@
--- Tensile-rocm-5.4.2/Tensile/Source/lib/CMakeLists.txt.bak 2023-01-27 08:30:16.374451318 +0100
+++ Tensile-rocm-5.4.2/Tensile/Source/lib/CMakeLists.txt 2023-01-27 08:30:33.194515443 +0100
@@ -103,7 +103,7 @@
endif()
if(TENSILE_USE_MSGPACK)
- find_package(msgpack REQUIRED)
+ find_package(msgpackc-cxx REQUIRED)
target_compile_definitions(TensileHost PUBLIC -DTENSILE_MSGPACK=1)
if(TARGET msgpackc-cxx)

70
rocblas/test.cpp Normal file
View File

@ -0,0 +1,70 @@
#include <rocblas/rocblas.h>
#include <hip/hip_runtime.h>
#include <vector>
#include <random>
#include <algorithm>
#include <cmath>
#include <iostream>
int main()
{
size_t n = 128;
size_t size = n * n;
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> dist(-1.0, 1.0);
auto myrand = [&](){return dist(gen);};
float *x;
float *y;
float *z;
hipMalloc((void**)&x, sizeof *x * size);
hipMalloc((void**)&y, sizeof *y * size);
hipMalloc((void**)&z, sizeof *z * size);
std::vector<float> xin(size);
std::vector<float> yin(size);
std::generate(xin.begin(), xin.end(), myrand);
std::generate(yin.begin(), yin.end(), myrand);
hipMemcpy(x, xin.data(), sizeof *x * size, hipMemcpyHostToDevice);
hipMemcpy(y, yin.data(), sizeof *x * size, hipMemcpyHostToDevice);
rocblas_handle handle;
rocblas_create_handle(&handle);
float alpha = 15.412f;
float beta = 0.0f;
rocblas_sgemm(handle, rocblas_operation_none, rocblas_operation_none,
n, n, n, &alpha, x, n, y, n, &beta, z, n);
std::vector<float> zout(size);
hipMemcpy(zout.data(), z, sizeof *z * size, hipMemcpyDeviceToHost);
for(size_t j = 0; j < n; j++){
for(size_t i = 0; i < n; i++){
for(size_t k = 0; k < n; k++){
zout[i + j * n] -= alpha * xin[i + k * n] * yin[k + j * n];
}
}
}
float tol = 0.001f;
for(size_t i = 0; i < size; i++){
if(std::abs(zout[i]) > tol){
std::cout << "Element mismatch at index " << i << "\n";
std::cout << "Expected: 0\n";
std::cout << "Actual : " << zout[i] << "\n";
return 1;
}
}
std::cout << "TESTS PASSED!" << std::endl;
hipFree(x);
hipFree(y);
hipFree(z);
rocblas_destroy_handle(handle);
}

5
rocblas/test.sh Executable file
View File

@ -0,0 +1,5 @@
#! /usr/bin/env sh
OUT=$(mktemp -d)
/opt/rocm/bin/hipcc -o "$OUT"/test test.cpp -lrocblas
"$OUT"/test