* update rocblas to 6.2.4-1

This commit is contained in:
Alexander Baldeck 2025-01-25 10:22:44 +01:00
parent 6e12f01a5c
commit e8dd0d323e
4 changed files with 113 additions and 33 deletions

View File

@ -1,31 +1,35 @@
pkgbase = rocblas
pkgdesc = Next generation BLAS implementation for ROCm platform
pkgver = 6.0.2
pkgver = 6.2.4
pkgrel = 1
url = https://rocblas.readthedocs.io/en/latest
arch = x86_64
arch = powerpc64le
arch = powerpc64
arch = riscv64
license = MIT
makedepends = rocm-cmake
makedepends = python
makedepends = python-virtualenv
makedepends = python-pyaml
makedepends = python-wheel
makedepends = git
makedepends = python-msgpack
makedepends = python-joblib
makedepends = perl-file-which
makedepends = msgpack-cxx
makedepends = gcc-fortran
depends = rocm-core
depends = hip
depends = hip-runtime-amd
depends = glibc
depends = gcc-libs
depends = openmp
options = !lto
source = rocblas-6.0.2.tar.gz::https://github.com/ROCmSoftwarePlatform/rocBLAS/archive/rocm-6.0.2.tar.gz
source = rocblas-tensile-6.0.2.tar.gz::https://github.com/ROCmSoftwarePlatform/Tensile/archive/refs/tags/rocm-6.0.2.tar.gz
source = find-msgpack-5.patch
sha256sums = d1bf31063a2d349797b88c994c91d05f94e681bafb5550ad9b53529703d89dbb
sha256sums = 1d8a92422560c1e908fa25fd97a4aa07a96659528a543f77618408ffcfe1f307
sha256sums = 3f91bf087e4ea72eaef5acd500e16b61aa69c029cfcca14666799a7c42a0c5aa
source = rocblas-6.2.4.tar.gz::https://github.com/ROCmSoftwarePlatform/rocBLAS/archive/rocm-6.2.4.tar.gz
source = rocblas-tensile-6.2.4.tar.gz::https://github.com/ROCmSoftwarePlatform/Tensile/archive/refs/tags/rocm-6.2.4.tar.gz
source = remove-mf16c-flag-as-f16-intrinsics.patch
sha256sums = 8bacf74e3499c445f1bb0a8048df1ef3ce6f72388739b1823b5784fd1e8aa22a
sha256sums = dd0721e4371c8752aa4b14362f75d7ebb7805f57dcb990e03ae08cef4a291383
sha256sums = 5ed4d48d747c36c7a3739abd0ee791d92754cbabade09be0e80261d3431bb268
pkgname = rocblas

View File

@ -1,39 +1,42 @@
# POWER Maintainer: Alexander Baldeck <alex.bldck@gmail.com>
# Maintainer: Torsten Keßler <tpkessler at archlinux dot org>
# Contributor: Markus Näther <naether.markus@gmail.com>
# Contributor: Lubosz Sarnecki <lubosz@gmail.com>
pkgname=rocblas
pkgver=6.0.2
pkgrel=1.1
pkgver=6.2.4
pkgrel=1
pkgdesc='Next generation BLAS implementation for ROCm platform'
arch=(x86_64 powerpc64le powerpc64 riscv64)
url='https://rocblas.readthedocs.io/en/latest'
license=('MIT')
depends=('rocm-core' 'hip' 'glibc' 'gcc-libs' 'openmp')
makedepends=('rocm-cmake' 'python' 'python-virtualenv' 'python-pyaml' 'python-wheel'
depends=('rocm-core' 'hip-runtime-amd' 'glibc' 'gcc-libs' 'openmp')
makedepends=('rocm-cmake' 'python' 'python-virtualenv' 'python-pyaml' 'python-wheel' 'git'
'python-msgpack' 'python-joblib' 'perl-file-which' 'msgpack-cxx' 'gcc-fortran')
_rocblas='https://github.com/ROCmSoftwarePlatform/rocBLAS'
_tensile='https://github.com/ROCmSoftwarePlatform/Tensile'
source=("$pkgname-$pkgver.tar.gz::$_rocblas/archive/rocm-$pkgver.tar.gz"
"$pkgname-tensile-$pkgver.tar.gz::$_tensile/archive/refs/tags/rocm-$pkgver.tar.gz"
"find-msgpack-5.patch")
sha256sums=('d1bf31063a2d349797b88c994c91d05f94e681bafb5550ad9b53529703d89dbb'
'1d8a92422560c1e908fa25fd97a4aa07a96659528a543f77618408ffcfe1f307'
'3f91bf087e4ea72eaef5acd500e16b61aa69c029cfcca14666799a7c42a0c5aa')
options=(!debug !lto)
"remove-mf16c-flag-as-f16-intrinsics.patch")
sha256sums=('8bacf74e3499c445f1bb0a8048df1ef3ce6f72388739b1823b5784fd1e8aa22a'
'dd0721e4371c8752aa4b14362f75d7ebb7805f57dcb990e03ae08cef4a291383'
'5ed4d48d747c36c7a3739abd0ee791d92754cbabade09be0e80261d3431bb268')
options=(!lto)
_dirname="$(basename "$_rocblas")-$(basename "${source[0]}" ".tar.gz")"
_tensile_dir="$(basename "$_tensile")-$(basename "${source[1]}" ".tar.gz")"
prepare() {
cd "$_tensile_dir"
patch -Np1 -i "$srcdir/find-msgpack-5.patch"
cd "$_dirname"
# https://github.com/ROCm/Tensile/issues/2044
patch -Np1 -i ../remove-mf16c-flag-as-f16-intrinsics.patch
}
build() {
# Compile source code for supported GPU archs in parallel
#export HIPCC_COMPILE_FLAGS_APPEND="-parallel-jobs=$(nproc)"
#export HIPCC_LINK_FLAGS_APPEND="-parallel-jobs=$(nproc)"
export HIPCC_COMPILE_FLAGS_APPEND="-parallel-jobs=4"
export HIPCC_LINK_FLAGS_APPEND="-parallel-jobs=4"
# -fcf-protection is not supported by HIP, see
# https://rocm.docs.amd.com/en/latest/reference/rocmcc.html#support-status-of-other-clang-options
# https://rocm.docs.amd.com/projects/llvm-project/en/latest/reference/rocmcc.html#support-status-of-other-clang-options
local cmake_args=(
-Wno-dev
-S "$_dirname"

View File

@ -1,11 +0,0 @@
--- Tensile-rocm-5.4.2/Tensile/Source/lib/CMakeLists.txt.bak 2023-01-27 08:30:16.374451318 +0100
+++ Tensile-rocm-5.4.2/Tensile/Source/lib/CMakeLists.txt 2023-01-27 08:30:33.194515443 +0100
@@ -103,7 +103,7 @@
endif()
if(TENSILE_USE_MSGPACK)
- find_package(msgpack REQUIRED)
+ find_package(msgpackc-cxx REQUIRED)
target_compile_definitions(TensileHost PUBLIC -DTENSILE_MSGPACK=1)
if(TARGET msgpackc-cxx)

View File

@ -0,0 +1,84 @@
From 53ed3dbd9c05805eb431de81a19712099a099db9 Mon Sep 17 00:00:00 2001
From: amcamd <andrew.chapman@gmail.com>
Date: Mon, 29 Apr 2024 15:06:12 -0500
Subject: [PATCH] remove mf16c flag as f16 intrinsics _cvtss_sh, _cvtsh_ss no
longer used
---
clients/benchmarks/CMakeLists.txt | 8 --------
clients/gtest/CMakeLists.txt | 4 ----
clients/samples/CMakeLists.txt | 2 --
library/CMakeLists.txt | 5 +----
4 files changed, 1 insertion(+), 18 deletions(-)
diff --git a/clients/benchmarks/CMakeLists.txt b/clients/benchmarks/CMakeLists.txt
index 0bfd30af..e953a974 100644
--- a/clients/benchmarks/CMakeLists.txt
+++ b/clients/benchmarks/CMakeLists.txt
@@ -97,14 +97,6 @@ if( BUILD_WITH_TENSILE )
target_link_libraries( rocblas-gemm-tune PRIVATE ${COMMON_LINK_LIBS} )
endif()
-if( CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
- # GCC or hip-clang needs specific flags to turn on f16c intrinsics
- target_compile_options( rocblas-bench PRIVATE -mf16c )
- if( BUILD_WITH_TENSILE )
- target_compile_options( rocblas-gemm-tune PRIVATE -mf16c )
- endif()
-endif()
-
target_compile_options(rocblas-bench PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${COMMON_CXX_OPTIONS}>)
if( BUILD_WITH_TENSILE )
target_compile_options(rocblas-gemm-tune PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${COMMON_CXX_OPTIONS}>)
diff --git a/clients/gtest/CMakeLists.txt b/clients/gtest/CMakeLists.txt
index 8352276c..3a3f003c 100644
--- a/clients/gtest/CMakeLists.txt
+++ b/clients/gtest/CMakeLists.txt
@@ -165,10 +165,6 @@ else()
endif()
target_link_libraries( rocblas-test PRIVATE ${COMMON_LINK_LIBS} )
-if( CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
- # GCC or hip-clang needs specific flag to turn on f16c intrinsics
- target_compile_options( rocblas-test PRIVATE -mf16c )
-endif( )
target_compile_options(rocblas-test PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${COMMON_CXX_OPTIONS}>)
target_compile_definitions( rocblas-test PRIVATE ROCM_USE_FLOAT16 ROCBLAS_INTERNAL_API ROCBLAS_NO_DEPRECATED_WARNINGS )
diff --git a/clients/samples/CMakeLists.txt b/clients/samples/CMakeLists.txt
index 2f6caeaf..8d09dc97 100644
--- a/clients/samples/CMakeLists.txt
+++ b/clients/samples/CMakeLists.txt
@@ -115,8 +115,6 @@ foreach( exe ${sample_list_all} )
)
if( CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
- # GCC or hip-clang needs specific flags to turn on f16c intrinsics
- target_compile_options( ${exe} PRIVATE -mf16c )
target_compile_definitions( ${exe} PRIVATE ROCBLAS_INTERNAL_API )
endif( )
diff --git a/library/CMakeLists.txt b/library/CMakeLists.txt
index 90a75dd3..9a5717ba 100755
--- a/library/CMakeLists.txt
+++ b/library/CMakeLists.txt
@@ -1,5 +1,5 @@
# ########################################################################
-# Copyright (C) 2016-2023 Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2016-2024 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -60,9 +60,6 @@ function( rocblas_library_settings lib_target_ )
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
)
- # GCC or hip-clang needs specific flags to turn on f16c intrinsics
- target_compile_options( ${lib_target_} PRIVATE -mf16c )
-
# Do not allow Variable Length Arrays (use unique_ptr instead)
target_compile_options( ${lib_target_} PRIVATE -Werror=vla )
--
2.47.0