Skip to content

Instantly share code, notes, and snippets.

@bgeneto
Last active January 20, 2025 09:17
Show Gist options
  • Save bgeneto/155dbaf4e56fae9015b40c16c4268b7a to your computer and use it in GitHub Desktop.
Save bgeneto/155dbaf4e56fae9015b40c16c4268b7a to your computer and use it in GitHub Desktop.
OpenBLAS ZHEEV slow performance bug
! --------------------------------------------------------------------------------------
!
! 01. Purpose: Solves a simple eigenvalue/eigenvector problem in order to check
! for the zheev thread locking bug #1560
! https://github.com/xianyi/OpenBLAS/issues/1560
!
! 02. Compile and build
!
! export MAX_THREADS=4
! export OPENBLAS_NUM_THREADS=$MAX_THREADS
! export MKL_NUM_THREADS=$MAX_THREADS
! export OMP_NUM_THREADS=$MAX_THREADS
! export GOTO_NUM_THREADS=$MAX_THREADS
! export BLIS_NUM_THREADS=$MAX_THREADS
!
! gfortran + openblas:
! gfortran -O2 -m64 -mcmodel=medium -fmax-stack-var-size=2147483647 -pthread ./hermitianEigen.f90 -o ./hermitianEigen -lopenblas
!
! gfortran + mkl:
! gfortran -O2 -m64 -mcmodel=medium -fmax-stack-var-size=2147483647 ./hermitianEigen.f90 -o ./hermitianEigen-mkl -Wl,--no-as-needed -lmkl_gf_lp64 -lmkl_tbb_thread -lmkl_core -lpthread -lm -ldl
!
! Intel fortran + MKL:
! ifort -O2 -i8 -qmkl=parallel ./hermitianEigen.f90 -o ./hermitianEigen-ifort -lpthread -lm -ldl
!
! 03. Run
! # to generate a positive definite hermitian matrix:
! ./hermitianEigen -p
! # to generate a generic hermitian matrix:
! ./hermitianEigen
!
! Last Modified: 20220801
! Author: bgeneto
!
! --------------------------------------------------------------------------------------
program hermitianEigen
use, intrinsic :: iso_fortran_env
implicit none
integer, parameter :: rp = REAL64 ! default real precision
integer, parameter :: ip = INT64 ! default integer precision
integer(ip), parameter :: n = 4096_ip ! hermitian matrix dimension
integer(ip), parameter :: lda = n, ldz = n ! leading dimensions
integer :: seed(4) = [0, 0, 0, 1]
integer(ip) :: i, j, k, info, lwork, lrwork, liwork, &
& il, iu, m, start_time, end_time, rate
real(rp) :: abstol, vl, vu
! static/stack allocated arrays
real(rp) :: w(n), revals(n*n), imvals(n*n)
integer(ip) :: isuppz(n)
complex(rp) :: mat(lda,n), bmat(lda,n), z(ldz,n)
! dynamic allocated arrays
real(rp), allocatable, dimension(:) :: rwork
integer(ip), allocatable, dimension(:) :: iwork
complex(rp), allocatable, dimension(:) :: work
character(100) :: param
logical :: pdm = .false.
intrinsic :: random_seed, random_number
intrinsic :: int, min, max
external :: zheev, zheevr
call system_clock (count_rate=rate)
call system_clock (count=start_time)
! uses the default tolerance
abstol = -1.0_rp
! check command line parameter (in order to generate a positive definite matrix)
cmdarg: if (command_argument_count() > 0) then
call get_command_argument(1, param)
param = trim(adjustl(param))
if (param(1:2) .eq. '-p' .or. param(1:2) .eq. '/p') then
pdm = .true.
end if
end if cmdarg
! generate matrix
matrix: if (pdm) then
! generate random hermitian positive definite matrix
! in this case zheev runs faster than zheevr...
write(*,*) '01. Generating random numbers for hermitian matrix... '
call zlarnv(1, seed, lda*n, mat)
write(*,*) '02. Building our positive definite hermitian matrix...'
mat = mat * conjg(transpose(mat))
do i = 1, n
mat(i,i) = mat(i,i) + n
end do
else
write(*,*) '01. Generating random numbers for hermitian matrix... '
call random_seed()
call random_number(revals)
call random_seed()
call random_number(imvals)
write(*,*) '02. Building our generic hermitian matrix...'
! dummy way to create our hermitian matrix
k = 0_ip
do j = 1, n
do i = 1, n
k = k + 1_ip
mat(i,j) = cmplx(revals(k), imvals(k), kind=rp)
if (i == j) then
mat(i,j)%im = 0.0_rp
else if ( (mat(i,j) /= mat(j,i)) .and. (j > i) ) then
mat(i,j)%re = mat(j,i)%re
mat(i,j)%im = -mat(j,i)%im
end if
end do
end do
end if matrix
! boundary conditions
mat(1,n) = mat(n,1)
! backup matrix
bmat = mat
! ------------------ zheevr ------------------
write(*,*) '03. Computing using ZHEEVR...'
! temporary allocate arrays
allocate(iwork(1), rwork(1), work(1))
! query optimal workspace array dimensions
lwork = -1
lrwork = -1
liwork = -1
call zheevr('V', 'A', 'U', n, mat, lda, vl, vu, il, &
& iu, abstol, m, w, z, ldz, isuppz, work, lwork, rwork, &
& lrwork, iwork, liwork, info)
if ( info .gt. 0 ) then
write(*,*) 'Failed to find optimal work array dimensions.'
stop
end if
! work arrays dimensions
lwork = max(2*n, int(work(1)))
lrwork = max(24*n, int(rwork(1)))
liwork = min(10*n, iwork(1))
write(*,"(1x,a,3i9)") '04. Optimal workspace for ZHEEVR computed: ', liwork, lrwork, lwork
deallocate(iwork, rwork, work)
allocate(iwork(liwork), rwork(lrwork), work(lwork))
write(*,*) '05. Computing all eigenvalues and eigenvectors using ZHEEVR...'
! now we solve the eigenproblem
call zheevr( 'V', 'A', 'U', n, mat, lda, vl, vu, il, &
& iu, abstol, m, w, z, ldz, isuppz, work, lwork, rwork, &
& lrwork, iwork, liwork, info )
! check for convergence
if ( info .gt. 0 ) then
write(*,*) 'ERROR: Algorithm ZHEEVR failed to compute eigenvalues.'
stop
else
write(*,*) ' OK: all eigenvalues/vectors computed successfully by ZHEEVR!'
end if
call system_clock (count=end_time)
write(*,"(a,f8.3,a)") "ZHEEVR took: ", real(end_time - start_time)/real(rate), " seconds"
! ------------------ zheev ------------------
call system_clock (count_rate=rate)
call system_clock (count=start_time)
write(*,*) '06. Now using ZHEEV, so be patient (switch to single thread bug)...'
! temporary allocate arrays
deallocate(iwork, rwork, work)
allocate(iwork(1), rwork(1), work(1))
! query the optimal workspace
lwork = -1
call zheev( 'V', 'U', n, bmat, lda, w, work, lwork, rwork, info )
if ( info .gt. 0 ) then
write(*,*) 'ERROR: Failed to find optimal work array dimension.'
stop
end if
! work arrays dimension
lwork = max(2*n-1, int(work(1)))
lrwork = max(1, 3*n-2)
write(*,"(1x,a,3i9)") '07. Optimal workspace for ZHEEV computed: ', lrwork, lwork
! proper array allocation
deallocate(rwork, work)
allocate(rwork(lrwork), work(lwork))
write(*,*) '08. Computing all eigenvalues and eigenvectors using ZHEEV...'
! solve eigenproblem
call zheev( 'V', 'U', n, bmat, lda, w, work, lwork, rwork, info )
! check for convergence
if ( info .gt. 0 ) then
write(*,*) 'ERROR: Algorithm ZHEEV failed to compute eigenvalues.'
stop
else
write(*,*) ' OK: all eigenvalues/vectors computed successfully by ZHEEV!'
end if
call system_clock (count=end_time)
write(*,"(a,f8.3,a)") "ZHEEV took: ", real(end_time - start_time)/real(rate), " seconds"
end program hermitianEigen
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment