diff --git a/Golden_Repo/e/ELPA/ELPA-2021.11.001-gomkl-2021b.eb b/Golden_Repo/e/ELPA/ELPA-2021.11.001-gomkl-2021b.eb new file mode 100644 index 0000000000000000000000000000000000000000..cd313db7f33e93dd93834a820b48cc2727306f6c --- /dev/null +++ b/Golden_Repo/e/ELPA/ELPA-2021.11.001-gomkl-2021b.eb @@ -0,0 +1,107 @@ +name = 'ELPA' +version = '2021.11.001' + +homepage = 'https://elpa.rzg.mpg.de' +description = """Eigenvalue SoLvers for Petaflop-Applications. ELPA has been installed as module in +$EBROOTELPA ($ELPA_ROOT is also defined). This installation +contains the pure MPI version and the hybrid MPI/OpenMP version. +Notice: If you want to use OpenMP threads you have to set +export ELPA_DEFAULT_omp=<number of threads per MPI process> +in your batch job and start MPI with MPI_INIT_THREAD(MPI_THREAD_MULTIPLE,.... + +Several assembly kernels have been compiled. They can be chosen at runtime when calling the library or +with the environment variables REAL_ELPA_KERNEL or COMPLEX_ELPA_KERNEL. + +An example is +export REAL_ELPA_KERNEL=REAL_ELPA_KERNEL_GENERIC +which chooses the generic real kernel for elpa2. +Starting with version 2019.11.001 the legacy interface is no longer available. +""" + +usage = """You can get an overview over the available kernels by loading ELPA and then submitting a batch job with + +srun --ntasks=1 $EBROOTELPA/bin/elpa2_print_kernels + +Programs using this ELPA library have to be compiled with + +-I$ELPA_INCLUDE[_OPENMP]/ -I$ELPA_INCLUDE[_OPENMP]/elpa -I$ELPA_MODULES[_OPENMP] + +and linked with + +-L$EBROOTELPA/lib -lelpa[_openmp] +-lmkl_scalapack_lp64 +${MKLROOT}/lib/intel64/libmkl_blacs_openmpi_lp64.a +-lmkl_gf_lp64 -lmkl_sequential[-lmkl_gnu_thread] +-lmkl_core -lgomp -lpthread -lm -ldl -lstdc++ +""" + +examples = 'Examples can be found in $EBROOTELPA/examples' + +toolchain = {'name': 'gomkl', 'version': '2021b'} +toolchainopts = {'openmp': True, 'usempi': True} + +source_urls = ['https://gitlab.mpcdf.mpg.de/elpa/elpa/-/archive/new_release_%(version)s/'] +sources = ["elpa-new_release_%(version)s.tar.gz"] +patches = [ + '%(name)s-%(version)s_fix_hardcoded_perl_path.patch', + 'ELPA-%(version)s_install-libelpatest.patch', +] +checksums = [ + 'e61048393a5e5f460858a11b216547fa3f434dd620c478cb20a52ebf543260f1', # elpa-new_release_2021.11.001.tar.gz + # ELPA-2021.11.001_fix_hardcoded_perl_path.patch + '5fc40b6f3f948fd026efc688f9bafba0461d68ad007d9dc161bfd1507e2fc13b', + '2ce155ccbcdd61e8036d859aa204b48883695eff5f4decee3e5c2677516d8272', # ELPA-2021.11.001_install-libelpatest.patch +] + +builddependencies = [ + ('Autotools', '20210726'), + # remove_xcompiler script requires 'python' command, + ('Python', '3.9.6'), + ('Perl', '5.34.0'), +] + +preconfigopts = './autogen.sh && ' +preconfigopts += 'export LDFLAGS="-lm $LDFLAGS" && ' +preconfigopts += 'autoreconf && ' + +# The checking of MPI_THREAD_MULTIPLE does not work because the check uses an +# MPI program that is then executed by just ./conftest +# Unfortunately you cannot turn of checking during runtime, too +configopts = '--without-threading-support-check-during-build ' + +with_single = False + +# When building in parallel, the file test_setup_mpi.mod is sometimes +# used before it is built, leading to an error. This must be a bug in +# the makefile affecting parallel builds. +maxparallel = 1 + + +postinstallcmds = [ + 'cp -r %(builddir)s/elpa-new_release_%(version)s/examples %(installdir)s/examples/', + 'rm %(installdir)s/examples/*.orig', + 'rm %(installdir)s/examples/*_cuda', + 'rm %(installdir)s/examples/C/*.orig', + 'rm %(installdir)s/examples/C/*_cuda', + 'rm %(installdir)s/examples/Fortran/*.orig', + 'rm %(installdir)s/examples/Fortran/*_cuda', + 'cp %(builddir)s/elpa-new_release_%(version)s/test/shared/generated.h %(installdir)s/examples/C/generated.h', + 'cp config.h config-f90.h %(installdir)s/include/elpa_openmp-%(version)s/elpa/', + 'grep -v WITH_OPENMP config.h > %(installdir)s/include/elpa-%(version)s/elpa/config.h', + 'grep -v WITH_OPENMP config-f90.h > %(installdir)s/include/elpa-%(version)s/elpa/config-f90.h', + 'cp %(builddir)s/elpa-new_release_%(version)s/private_modules/* %(installdir)s/include/elpa-%(version)s/modules', + 'cp %(builddir)s/elpa-new_release_%(version)s/test_modules/* %(installdir)s/include/elpa-%(version)s/modules', +] + +modextravars = { + 'ELPA_ROOT': '%(installdir)s', + 'ELPAROOT': '%(installdir)s', + 'ELPA_INCLUDE': '%(installdir)s/include/elpa-%(version)s/', + 'ELPA_INCLUDE_OPENMP': '%(installdir)s/include/elpa_openmp-%(version)s/', + 'ELPA_LIB': '%(installdir)s/lib', + 'ELPA_LIB_OPENMP': '%(installdir)s/lib', + 'ELPA_MODULES': '%(installdir)s/include/elpa-%(version)s/modules', + 'ELPA_MODULES_OPENMP': '%(installdir)s/include/elpa_openmp-%(version)s/modules', +} + +moduleclass = 'math' diff --git a/Golden_Repo/e/ELPA/ELPA-2021.11.001-gpsmkl-2021b.eb b/Golden_Repo/e/ELPA/ELPA-2021.11.001-gpsmkl-2021b.eb new file mode 100644 index 0000000000000000000000000000000000000000..eeeef09bff2024c840a5b531087aebdc8b6181ae --- /dev/null +++ b/Golden_Repo/e/ELPA/ELPA-2021.11.001-gpsmkl-2021b.eb @@ -0,0 +1,105 @@ +name = 'ELPA' +version = '2021.11.001' + +homepage = 'https://elpa.rzg.mpg.de' +description = """Eigenvalue SoLvers for Petaflop-Applications. ELPA has been installed as module in +$EBROOTELPA ($ELPA_ROOT is also defined). This installation +contains the pure MPI version and the hybrid MPI/OpenMP version. +Notice: If you want to use OpenMP threads you have to set +export ELPA_DEFAULT_omp=<number of threads per MPI process> +in your batch job and start MPI with MPI_INIT_THREAD(MPI_THREAD_MULTIPLE,.... + +Several assembly kernels have been compiled. They can be chosen at runtime when calling the library or +with the environment variables REAL_ELPA_KERNEL or COMPLEX_ELPA_KERNEL. + +An example is +export REAL_ELPA_KERNEL=REAL_ELPA_KERNEL_GENERIC +which chooses the generic real kernel for elpa2. +Starting with version 2019.11.001 the legacy interface is no longer available. +""" + +usage = """You can get an overview over the available kernels by loading ELPA and then submitting a batch job with + +srun --ntasks=1 $EBROOTELPA/bin/elpa2_print_kernels + +Programs using this ELPA library have to be compiled with + +-I$ELPA_INCLUDE[_OPENMP]/ -I$ELPA_INCLUDE[_OPENMP]/elpa -I$ELPA_MODULES[_OPENMP] + +and linked with + +-L$EBROOTELPA/lib -lelpa[_openmp] +-lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64 -lmkl_gf_lp64 +-lmkl_sequential[-lmkl_gnu_thread] +-lmkl_core -lgomp -lpthread -lm -ldl -lstdc++ +""" + +examples = 'Examples can be found in $EBROOTELPA/examples' + +toolchain = {'name': 'gpsmkl', 'version': '2021b'} +toolchainopts = {'openmp': True, 'usempi': True} + +source_urls = ['https://gitlab.mpcdf.mpg.de/elpa/elpa/-/archive/new_release_%(version)s/'] +sources = ["elpa-new_release_%(version)s.tar.gz"] +patches = [ + '%(name)s-%(version)s_fix_hardcoded_perl_path.patch', + 'ELPA-%(version)s_install-libelpatest.patch', +] +checksums = [ + 'e61048393a5e5f460858a11b216547fa3f434dd620c478cb20a52ebf543260f1', # elpa-new_release_2021.11.001.tar.gz + # ELPA-2021.11.001_fix_hardcoded_perl_path.patch + '5fc40b6f3f948fd026efc688f9bafba0461d68ad007d9dc161bfd1507e2fc13b', + '2ce155ccbcdd61e8036d859aa204b48883695eff5f4decee3e5c2677516d8272', # ELPA-2021.11.001_install-libelpatest.patch +] + +builddependencies = [ + ('Autotools', '20210726'), + # remove_xcompiler script requires 'python' command, + ('Python', '3.9.6'), + ('Perl', '5.34.0'), +] + +preconfigopts = './autogen.sh && ' +preconfigopts += 'export LDFLAGS="-lm $LDFLAGS" && ' +preconfigopts += 'autoreconf && ' + +# The checking of MPI_THREAD_MULTIPLE does not work because the check uses an +# MPI program that is then executed by just ./conftest +# Unfortunately you cannot turn of checking during runtime, too +configopts = '--without-threading-support-check-during-build ' + +with_single = False + +# When building in parallel, the file test_setup_mpi.mod is sometimes +# used before it is built, leading to an error. This must be a bug in +# the makefile affecting parallel builds. +maxparallel = 1 + +postinstallcmds = [ + 'cp -r %(builddir)s/elpa-new_release_%(version)s/examples %(installdir)s/examples/', + 'rm %(installdir)s/examples/*.orig', + 'rm %(installdir)s/examples/*_cuda', + 'rm %(installdir)s/examples/C/*.orig', + 'rm %(installdir)s/examples/C/*_cuda', + 'rm %(installdir)s/examples/Fortran/*.orig', + 'rm %(installdir)s/examples/Fortran/*_cuda', + 'cp config.h config-f90.h %(installdir)s/include/elpa_openmp-%(version)s/elpa/', + 'grep -v WITH_OPENMP config.h > %(installdir)s/include/elpa-%(version)s/elpa/config.h', + 'grep -v WITH_OPENMP config-f90.h > %(installdir)s/include/elpa-%(version)s/elpa/config-f90.h', + 'cp %(builddir)s/elpa-new_release_%(version)s/test/shared/generated.h %(installdir)s/examples/C/generated.h', + 'cp %(builddir)s/elpa-new_release_%(version)s/private_modules/* %(installdir)s/include/elpa-%(version)s/modules', + 'cp %(builddir)s/elpa-new_release_%(version)s/test_modules/* %(installdir)s/include/elpa-%(version)s/modules', +] + +modextravars = { + 'ELPA_ROOT': '%(installdir)s', + 'ELPAROOT': '%(installdir)s', + 'ELPA_INCLUDE': '%(installdir)s/include/elpa-%(version)s/', + 'ELPA_INCLUDE_OPENMP': '%(installdir)s/include/elpa_openmp-%(version)s/', + 'ELPA_LIB': '%(installdir)s/lib', + 'ELPA_LIB_OPENMP': '%(installdir)s/lib', + 'ELPA_MODULES': '%(installdir)s/include/elpa-%(version)s/modules', + 'ELPA_MODULES_OPENMP': '%(installdir)s/include/elpa_openmp-%(version)s/modules', +} + +moduleclass = 'math' diff --git a/Golden_Repo/e/ELPA/ELPA-2021.11.001-intel-2021b.eb b/Golden_Repo/e/ELPA/ELPA-2021.11.001-intel-2021b.eb new file mode 100644 index 0000000000000000000000000000000000000000..2f80e05e2ba06c1ed2fb01d8dffdb1af26859701 --- /dev/null +++ b/Golden_Repo/e/ELPA/ELPA-2021.11.001-intel-2021b.eb @@ -0,0 +1,104 @@ +name = 'ELPA' +version = '2021.11.001' + +homepage = 'https://elpa.rzg.mpg.de' +description = """Eigenvalue SoLvers for Petaflop-Applications. ELPA has been installed as module in +$EBROOTELPA ($ELPA_ROOT is also defined). This installation +contains the pure MPI version and the hybrid MPI/OpenMP version. +Notice: If you want to use OpenMP threads you have to set +export ELPA_DEFAULT_omp=<number of threads per MPI process> +in your batch job and start MPI with MPI_INIT_THREAD(MPI_THREAD_MULTIPLE,.... + +Several assembly kernels have been compiled. They can be chosen at runtime when calling the library or +with the environment variables REAL_ELPA_KERNEL or COMPLEX_ELPA_KERNEL. + +An example is +export REAL_ELPA_KERNEL=REAL_ELPA_KERNEL_GENERIC +which chooses the generic real kernel for elpa2. +Starting with version 2019.11.001 the legacy interface is no longer available. +""" + +usage = """You can get an overview over the available kernels by loading ELPA and then submitting a batch job with + +srun --ntasks=1 $EBROOTELPA/bin/elpa2_print_kernels + +Programs using this ELPA library have to be compiled with + +-I$ELPA_INCLUDE[_OPENMP]/ -I$ELPA_INCLUDE[_OPENMP]/elpa -I$ELPA_MODULES[_OPENMP] + +and linked with + +-L$EBROOTELPA/lib -lelpa[_openmp] +-lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64 -lmkl_intel_lp64 +-lmkl_sequential[-lmkl_intel_thread] +-lmkl_core -liomp5 -lpthread -lstdc++ +""" + +examples = 'Examples can be found in $EBROOTELPA/examples' + +toolchain = {'name': 'intel', 'version': '2021b'} +toolchainopts = {'openmp': True, 'usempi': True} + +source_urls = ['https://gitlab.mpcdf.mpg.de/elpa/elpa/-/archive/new_release_%(version)s/'] +sources = ["elpa-new_release_%(version)s.tar.gz"] +patches = [ + '%(name)s-%(version)s_fix_hardcoded_perl_path.patch', + 'ELPA-%(version)s_install-libelpatest.patch', +] +checksums = [ + 'e61048393a5e5f460858a11b216547fa3f434dd620c478cb20a52ebf543260f1', # elpa-new_release_2021.11.001.tar.gz + # ELPA-2021.11.001_fix_hardcoded_perl_path.patch + '5fc40b6f3f948fd026efc688f9bafba0461d68ad007d9dc161bfd1507e2fc13b', + '2ce155ccbcdd61e8036d859aa204b48883695eff5f4decee3e5c2677516d8272', # ELPA-2021.11.001_install-libelpatest.patch +] + +builddependencies = [ + ('Autotools', '20210726'), + # remove_xcompiler script requires 'python' command, + ('Python', '3.9.6'), + ('Perl', '5.34.0'), +] + +preconfigopts = './autogen.sh && ' +preconfigopts += 'autoreconf && ' + +# The checking of MPI_THREAD_MULTIPLE does not work because the check uses an +# MPI program that is then executed by just ./conftest +# Unfortunately you cannot turn of checking during runtime, too +configopts = '--without-threading-support-check-during-build ' + +with_single = False + +# When building in parallel, the file test_setup_mpi.mod is sometimes +# used before it is built, leading to an error. This must be a bug in +# the makefile affecting parallel builds. +maxparallel = 1 + +postinstallcmds = [ + 'cp -r %(builddir)s/elpa-new_release_%(version)s/examples %(installdir)s/examples/', + 'rm %(installdir)s/examples/*.orig', + 'rm %(installdir)s/examples/*_cuda', + 'rm %(installdir)s/examples/C/*.orig', + 'rm %(installdir)s/examples/C/*_cuda', + 'rm %(installdir)s/examples/Fortran/*.orig', + 'rm %(installdir)s/examples/Fortran/*_cuda', + 'cp config.h config-f90.h %(installdir)s/include/elpa_openmp-%(version)s/elpa/', + 'grep -v WITH_OPENMP config.h > %(installdir)s/include/elpa-%(version)s/elpa/config.h', + 'grep -v WITH_OPENMP config-f90.h > %(installdir)s/include/elpa-%(version)s/elpa/config-f90.h', + 'cp %(builddir)s/elpa-new_release_%(version)s/test/shared/generated.h %(installdir)s/examples/C/generated.h', + 'cp %(builddir)s/elpa-new_release_%(version)s/private_modules/* %(installdir)s/include/elpa-%(version)s/modules', + 'cp %(builddir)s/elpa-new_release_%(version)s/test_modules/* %(installdir)s/include/elpa-%(version)s/modules', +] + +modextravars = { + 'ELPA_ROOT': '%(installdir)s', + 'ELPAROOT': '%(installdir)s', + 'ELPA_INCLUDE': '%(installdir)s/include/elpa-%(version)s/', + 'ELPA_INCLUDE_OPENMP': '%(installdir)s/include/elpa_openmp-%(version)s/', + 'ELPA_LIB': '%(installdir)s/lib', + 'ELPA_LIB_OPENMP': '%(installdir)s/lib', + 'ELPA_MODULES': '%(installdir)s/include/elpa-%(version)s/modules', + 'ELPA_MODULES_OPENMP': '%(installdir)s/include/elpa_openmp-%(version)s/modules', +} + +moduleclass = 'math' diff --git a/Golden_Repo/e/ELPA/ELPA-2021.11.001-intel-para-2021b.eb b/Golden_Repo/e/ELPA/ELPA-2021.11.001-intel-para-2021b.eb new file mode 100644 index 0000000000000000000000000000000000000000..8eda37582dd7d40372a09d88561616236b566c80 --- /dev/null +++ b/Golden_Repo/e/ELPA/ELPA-2021.11.001-intel-para-2021b.eb @@ -0,0 +1,104 @@ +name = 'ELPA' +version = '2021.11.001' + +homepage = 'https://elpa.rzg.mpg.de' +description = """Eigenvalue SoLvers for Petaflop-Applications. ELPA has been installed as module in +$EBROOTELPA ($ELPA_ROOT is also defined). This installation +contains the pure MPI version and the hybrid MPI/OpenMP version. +Notice: If you want to use OpenMP threads you have to set +export ELPA_DEFAULT_omp=<number of threads per MPI process> +in your batch job and start MPI with MPI_INIT_THREAD(MPI_THREAD_MULTIPLE,.... + +Several assembly kernels have been compiled. They can be chosen at runtime when calling the library or +with the environment variables REAL_ELPA_KERNEL or COMPLEX_ELPA_KERNEL. + +An example is +export REAL_ELPA_KERNEL=REAL_ELPA_KERNEL_GENERIC +which chooses the generic real kernel for elpa2. +Starting with version 2019.11.001 the legacy interface is no longer available. +""" + +usage = """You can get an overview over the available kernels by loading ELPA and then submitting a batch job with + +srun --ntasks=1 $EBROOTELPA/bin/elpa2_print_kernels + +Programs using this ELPA library have to be compiled with + +-I$ELPA_INCLUDE[_OPENMP]/ -I$ELPA_INCLUDE[_OPENMP]/elpa -I$ELPA_MODULES[_OPENMP] + +and linked with + +-L$EBROOTELPA/lib -lelpa[_openmp] +-lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64 -lmkl_intel_lp64 +-lmkl_sequential[-lmkl_intel_thread] +-lmkl_core -liomp5 -lpthread -lstdc++ +""" + +examples = 'Examples can be found in $EBROOTELPA/examples' + +toolchain = {'name': 'intel-para', 'version': '2021b'} +toolchainopts = {'openmp': True, 'usempi': True} + +source_urls = ['https://gitlab.mpcdf.mpg.de/elpa/elpa/-/archive/new_release_%(version)s/'] +sources = ["elpa-new_release_%(version)s.tar.gz"] +patches = [ + '%(name)s-%(version)s_fix_hardcoded_perl_path.patch', + 'ELPA-%(version)s_install-libelpatest.patch', +] +checksums = [ + 'e61048393a5e5f460858a11b216547fa3f434dd620c478cb20a52ebf543260f1', # elpa-new_release_2021.11.001.tar.gz + # ELPA-2021.11.001_fix_hardcoded_perl_path.patch + '5fc40b6f3f948fd026efc688f9bafba0461d68ad007d9dc161bfd1507e2fc13b', + '2ce155ccbcdd61e8036d859aa204b48883695eff5f4decee3e5c2677516d8272', # ELPA-2021.11.001_install-libelpatest.patch +] + +builddependencies = [ + ('Autotools', '20210726'), + # remove_xcompiler script requires 'python' command, + ('Python', '3.9.6'), + ('Perl', '5.34.0'), +] + +preconfigopts = './autogen.sh && ' +preconfigopts += 'autoreconf && ' + +# The checking of MPI_THREAD_MULTIPLE does not work because the check uses an +# MPI program that is then executed by just ./conftest +# Unfortunately you cannot turn of checking during runtime, too +configopts = '--without-threading-support-check-during-build ' + +with_single = False + +# When building in parallel, the file test_setup_mpi.mod is sometimes +# used before it is built, leading to an error. This must be a bug in +# the makefile affecting parallel builds. +maxparallel = 1 + +postinstallcmds = [ + 'cp -r %(builddir)s/elpa-new_release_%(version)s/examples %(installdir)s/examples/', + 'rm %(installdir)s/examples/*.orig', + 'rm %(installdir)s/examples/*_cuda', + 'rm %(installdir)s/examples/C/*.orig', + 'rm %(installdir)s/examples/C/*_cuda', + 'rm %(installdir)s/examples/Fortran/*.orig', + 'rm %(installdir)s/examples/Fortran/*_cuda', + 'cp config.h config-f90.h %(installdir)s/include/elpa_openmp-%(version)s/elpa/', + 'grep -v WITH_OPENMP config.h > %(installdir)s/include/elpa-%(version)s/elpa/config.h', + 'grep -v WITH_OPENMP config-f90.h > %(installdir)s/include/elpa-%(version)s/elpa/config-f90.h', + 'cp %(builddir)s/elpa-new_release_%(version)s/test/shared/generated.h %(installdir)s/examples/C/generated.h', + 'cp %(builddir)s/elpa-new_release_%(version)s/private_modules/* %(installdir)s/include/elpa-%(version)s/modules', + 'cp %(builddir)s/elpa-new_release_%(version)s/test_modules/* %(installdir)s/include/elpa-%(version)s/modules', +] + +modextravars = { + 'ELPA_ROOT': '%(installdir)s', + 'ELPAROOT': '%(installdir)s', + 'ELPA_INCLUDE': '%(installdir)s/include/elpa-%(version)s/', + 'ELPA_INCLUDE_OPENMP': '%(installdir)s/include/elpa_openmp-%(version)s/', + 'ELPA_LIB': '%(installdir)s/lib', + 'ELPA_LIB_OPENMP': '%(installdir)s/lib', + 'ELPA_MODULES': '%(installdir)s/include/elpa-%(version)s/modules', + 'ELPA_MODULES_OPENMP': '%(installdir)s/include/elpa_openmp-%(version)s/modules', +} + +moduleclass = 'math' diff --git a/Golden_Repo/e/ELPA/ELPA-2021.11.001-iomkl-2021b.eb b/Golden_Repo/e/ELPA/ELPA-2021.11.001-iomkl-2021b.eb new file mode 100644 index 0000000000000000000000000000000000000000..08e7f242458aa3a1f8959cd1a89621a97e85a3cf --- /dev/null +++ b/Golden_Repo/e/ELPA/ELPA-2021.11.001-iomkl-2021b.eb @@ -0,0 +1,105 @@ +name = 'ELPA' +version = '2021.11.001' + +homepage = 'https://elpa.rzg.mpg.de' +description = """Eigenvalue SoLvers for Petaflop-Applications. ELPA has been installed as module in +$EBROOTELPA ($ELPA_ROOT is also defined). This installation +contains the pure MPI version and the hybrid MPI/OpenMP version. +Notice: If you want to use OpenMP threads you have to set +export ELPA_DEFAULT_omp=<number of threads per MPI process> +in your batch job and start MPI with MPI_INIT_THREAD(MPI_THREAD_MULTIPLE,.... + +Several assembly kernels have been compiled. They can be chosen at runtime when calling the library or +with the environment variables REAL_ELPA_KERNEL or COMPLEX_ELPA_KERNEL. + +An example is +export REAL_ELPA_KERNEL=REAL_ELPA_KERNEL_GENERIC +which chooses the generic real kernel for elpa2. +Starting with version 2019.11.001 the legacy interface is no longer available. +""" + +usage = """You can get an overview over the available kernels by loading ELPA and then submitting a batch job with + +srun --ntasks=1 $EBROOTELPA/bin/elpa2_print_kernels + +Programs using this ELPA library have to be compiled with + +-I$ELPA_INCLUDE[_OPENMP]/ -I$ELPA_INCLUDE[_OPENMP]/elpa -I$ELPA_MODULES[_OPENMP] + +and linked with + +-L$EBROOTELPA/lib -lelpa[_openmp] +-lmkl_scalapack_lp64 +${MKLROOT}/lib/intel64/libmkl_blacs_openmpi_lp64.a +-lmkl_intel_lp64 -lmkl_sequential[-lmkl_intel_thread] +-lmkl_core -liomp -lpthread -ldl -lstdc++ +""" + +examples = 'Examples can be found in $EBROOTELPA/examples' + +toolchain = {'name': 'iomkl', 'version': '2021b'} +toolchainopts = {'openmp': True, 'usempi': True} + +source_urls = ['https://gitlab.mpcdf.mpg.de/elpa/elpa/-/archive/new_release_%(version)s/'] +sources = ["elpa-new_release_%(version)s.tar.gz"] +patches = [ + '%(name)s-%(version)s_fix_hardcoded_perl_path.patch', + 'ELPA-%(version)s_install-libelpatest.patch', +] +checksums = [ + 'e61048393a5e5f460858a11b216547fa3f434dd620c478cb20a52ebf543260f1', # elpa-new_release_2021.11.001.tar.gz + # ELPA-2021.11.001_fix_hardcoded_perl_path.patch + '5fc40b6f3f948fd026efc688f9bafba0461d68ad007d9dc161bfd1507e2fc13b', + '2ce155ccbcdd61e8036d859aa204b48883695eff5f4decee3e5c2677516d8272', # ELPA-2021.11.001_install-libelpatest.patch +] + +builddependencies = [ + ('Autotools', '20210726'), + # remove_xcompiler script requires 'python' command, + ('Python', '3.9.6'), + ('Perl', '5.34.0'), +] + +preconfigopts = './autogen.sh && ' +preconfigopts += 'autoreconf && ' + +# The checking of MPI_THREAD_MULTIPLE does not work because the check uses an +# MPI program that is then executed by just ./conftest +# Unfortunately you cannot turn of checking during runtime, too +configopts = '--without-threading-support-check-during-build ' + +with_single = False + +# When building in parallel, the file test_setup_mpi.mod is sometimes +# used before it is built, leading to an error. This must be a bug in +# the makefile affecting parallel builds. +maxparallel = 1 + +postinstallcmds = [ + 'cp -r %(builddir)s/elpa-new_release_%(version)s/examples %(installdir)s/examples/', + 'rm %(installdir)s/examples/*.orig', + 'rm %(installdir)s/examples/*_cuda', + 'rm %(installdir)s/examples/C/*.orig', + 'rm %(installdir)s/examples/C/*_cuda', + 'rm %(installdir)s/examples/Fortran/*.orig', + 'rm %(installdir)s/examples/Fortran/*_cuda', + 'cp %(builddir)s/elpa-new_release_%(version)s/test/shared/generated.h %(installdir)s/examples/C/generated.h', + 'cp config.h config-f90.h %(installdir)s/include/elpa_openmp-%(version)s/elpa/', + 'grep -v WITH_OPENMP config.h > %(installdir)s/include/elpa-%(version)s/elpa/config.h', + 'grep -v WITH_OPENMP config-f90.h > %(installdir)s/include/elpa-%(version)s/elpa/config-f90.h', + 'cp %(builddir)s/elpa-new_release_%(version)s/private_modules/* %(installdir)s/include/elpa-%(version)s/modules', + 'cp %(builddir)s/elpa-new_release_%(version)s/test_modules/* %(installdir)s/include/elpa-%(version)s/modules', +] + +modextravars = { + 'ELPA_ROOT': '%(installdir)s', + 'ELPAROOT': '%(installdir)s', + 'ELPA_INCLUDE': '%(installdir)s/include/elpa-%(version)s/', + 'ELPA_INCLUDE_OPENMP': '%(installdir)s/include/elpa_openmp-%(version)s/', + 'ELPA_LIB': '%(installdir)s/lib', + 'ELPA_LIB_OPENMP': '%(installdir)s/lib', + 'ELPA_MODULES': '%(installdir)s/include/elpa-%(version)s/modules', + 'ELPA_MODULES_OPENMP': '%(installdir)s/include/elpa_openmp-%(version)s/modules', +} + +moduleclass = 'math' diff --git a/Golden_Repo/e/ELPA/ELPA-2021.11.001_fix_hardcoded_perl_path.patch b/Golden_Repo/e/ELPA/ELPA-2021.11.001_fix_hardcoded_perl_path.patch new file mode 100644 index 0000000000000000000000000000000000000000..5c7ae5eae05ce11d3a24b3202194616933db38da --- /dev/null +++ b/Golden_Repo/e/ELPA/ELPA-2021.11.001_fix_hardcoded_perl_path.patch @@ -0,0 +1,40 @@ +--- elpa-new_release_2021.11.001/test_project_1stage/fdep/fortran_dependencies.pl 2021-12-17 08:20:49.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/test_project_1stage/fdep/fortran_dependencies.pl 2022-01-25 17:07:21.169362000 +0100 +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl -w ++#!/usr/bin/env perl + + use strict; + +--- elpa-new_release_2021.11.001/fdep/fortran_dependencies.pl 2021-12-17 08:20:49.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/fdep/fortran_dependencies.pl 2022-01-25 17:08:17.272544000 +0100 +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl -w ++#!/usr/bin/env perl + # + # Copyright 2015 Lorenz Hüdepohl + # +--- elpa-new_release_2021.11.001/test_project_C_2stage/fdep/fortran_dependencies.pl 2021-12-17 08:20:49.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/test_project_C_2stage/fdep/fortran_dependencies.pl 2022-01-25 17:06:20.088471000 +0100 +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl -w ++#!/usr/bin/env perl + + use strict; + +--- elpa-new_release_2021.11.001/test_project_2stage/fdep/fortran_dependencies.pl 2021-12-17 08:20:49.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/test_project_2stage/fdep/fortran_dependencies.pl 2022-01-25 17:05:10.675886000 +0100 +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl -w ++#!/usr/bin/env perl + + use strict; + +--- elpa-new_release_2021.11.001/test_project_C/fdep/fortran_dependencies.pl 2021-12-17 08:20:49.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/test_project_C/fdep/fortran_dependencies.pl 2022-01-25 17:04:14.834326000 +0100 +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl -w ++#!/usr/bin/env perl + + use strict; + diff --git a/Golden_Repo/e/ELPA/ELPA-2021.11.001_install-libelpatest.patch b/Golden_Repo/e/ELPA/ELPA-2021.11.001_install-libelpatest.patch new file mode 100644 index 0000000000000000000000000000000000000000..ad5240130a3f09af760ed386157af998597a8f24 --- /dev/null +++ b/Golden_Repo/e/ELPA/ELPA-2021.11.001_install-libelpatest.patch @@ -0,0 +1,12790 @@ +--- elpa-new_release_2021.11.001/Makefile.am 2021-12-17 08:20:49.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/Makefile.am 2022-01-26 10:47:31.956245709 +0100 +@@ -665,7 +665,7 @@ + test_program_fcflags = $(AM_FCFLAGS) $(FC_MODOUT)test_modules $(FC_MODINC)test_modules $(FC_MODINC)modules $(FC_MODINC)private_modules + + # library with shared sources for the test files +-noinst_LTLIBRARIES += libelpatest@SUFFIX@.la ++lib_LTLIBRARIES += libelpatest@SUFFIX@.la + libelpatest@SUFFIX@_la_FCFLAGS = $(test_program_fcflags) + libelpatest@SUFFIX@_la_SOURCES = \ + test/shared/tests_variable_definitions.F90 \ +diff -ruN elpa-new_release_2021.11.001/examples/C/Makefile_examples_hybrid elpa-new_release_2021.11.001_ok/examples/C/Makefile_examples_hybrid +--- elpa-new_release_2021.11.001/examples/C/Makefile_examples_hybrid 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/C/Makefile_examples_hybrid 2022-01-28 09:53:19.118256000 +0100 +@@ -0,0 +1,31 @@ ++# MPICH, that is IntelMPI or ParaStationMPI ++SCALAPACK_LIB = -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64 ++# OpenMPI ++# SCALAPACK_LIB = -lmkl_scalapack_lp64 $(MKLROOT)/lib/intel64/libmkl_blacs_openmpi_lp64.a ++LAPACK_LIB = ++# Intel compiler ++MKL = -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lstdc++ ++# GCC ++# MKL = -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lstdc++ -lm ++F90 = mpif90 -O3 -qopenmp -I$(ELPA_MODULES_OPENMP) -I$(ELPA_INCLUDE_OPENMP) -I$(ELPA_INCLUDE_OPENMP)/elpa -I$(ELPA_INCLUDE) -I$(ELPA_INCLUDE)/elpa ++# GCC ++# F90 = mpif90 -O3 -fopenmp -I$(ELPA_MODULES_OPENMP) -I$(ELPA_INCLUDE_OPENMP) -I$(ELPA_INCLUDE_OPENMP)/elpa -I$(ELPA_INCLUDE) -I$(ELPA_INCLUDE)/elpa ++LIBS = -L$(ELPA_LIB_OPENMP) -lelpa_openmp -lelpatest_openmp $(SCALAPACK_LIB) $(MKL) ++CC = mpicc -O3 -qopenmp ++# GCC ++# CC = mpicc -O3 -fopenmp ++ ++all: test_real_1stage_hybrid test_real_2stage_all_kernels_hybrid test_autotune_hybrid test_multiple_objs_hybrid ++ ++test_real_1stage_hybrid: test.c ++ $(CC) -DCURRENT_API_VERSION=20211125 -DTEST_GPU=0 -DTEST_REAL -DTEST_DOUBLE -DTEST_SOLVER_1STAGE -DWITH_OPENMP_TRADITIONAL -DTEST_EIGENVECTORS -DWITH_MPI -I$(ELPA_INCLUDE_OPENMP) -I$(ELPA_INCLUDE_OPENMP)/elpa -I$(ELPA_INCLUDE) -I$(ELPA_INCLUDE)/elpa -I. -o $@ test.c $(LIBS) ++ ++test_real_2stage_all_kernels_hybrid: test.c ++ $(CC) -DCURRENT_API_VERSION=20211125 -DTEST_GPU=0 -DTEST_GPU=0 -DTEST_REAL -DTEST_DOUBLE -DTEST_SOLVER_2STAGE -DWITH_OPENMP_TRADITIONAL -DTEST_EIGENVECTORS -DTEST_ALL_KERNELS -DWITH_MPI -I$(ELPA_INCLUDE_OPENMP) -I$(ELPA_INCLUDE_OPENMP)/elpa -I$(ELPA_INCLUDE) -I$(ELPA_INCLUDE)/elpa -I. -o $@ test.c $(LIBS) ++ ++test_autotune_hybrid: test_autotune.c ++ $(CC) -DCURRENT_API_VERSION=20211125 -DTEST_GPU=0 -DTEST_REAL -DTEST_DOUBLE -DWITH_MPI -DWITH_OPENMP_TRADITIONAL -I$(ELPA_INCLUDE_OPENMP) -I$(ELPA_INCLUDE_OPENMP)/elpa -I$(ELPA_INCLUDE) -I$(ELPA_INCLUDE)/elpa -I. -o $@ test_autotune.c $(LIBS) ++ ++test_multiple_objs_hybrid: test_multiple_objs.c ++ $(CC) -DCURRENT_API_VERSION=20211125 -DTEST_GPU=0 -DTEST_REAL -DTEST_DOUBLE -DWITH_MPI -DWITH_OPENMP_TRADITIONAL -I$(ELPA_INCLUDE_OPENMP) -I$(ELPA_INCLUDE_OPENMP)/elpa -I$(ELPA_INCLUDE) -I$(ELPA_INCLUDE)/elpa -I. -o $@ test_multiple_objs.c $(LIBS) ++ +diff -ruN elpa-new_release_2021.11.001/examples/C/Makefile_examples_pure elpa-new_release_2021.11.001_ok/examples/C/Makefile_examples_pure +--- elpa-new_release_2021.11.001/examples/C/Makefile_examples_pure 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/C/Makefile_examples_pure 2022-01-28 09:53:42.223490000 +0100 +@@ -0,0 +1,27 @@ ++# MPICH, that is IntelMPI or ParaStationMPI ++SCALAPACK_LIB = -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64 ++# OpenMPI ++# SCALAPACK_LIB = -lmkl_scalapack_lp64 $(MKLROOT)/lib/intel64/libmkl_blacs_openmpi_lp64.a ++LAPACK_LIB = ++# Intel compiler ++MKL = -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -liomp5 -lpthread -lstdc++ ++# GCC ++# MKL = -lmkl_gf_lp64 -lmkl_sequential -lmkl_core -lgomp -lpthread -lstdc++ -lm ++F90 = mpif90 -O3 -I$(ELPA_MODULES) -I$(ELPA_INCLUDE) -I$(ELPA_INCLUDE)/elpa ++LIBS = -L$(ELPA_LIB) -lelpa -lelpatest $(SCALAPACK_LIB) $(MKL) ++CC = mpicc -O3 ++ ++all: test_real_1stage test_real_2stage_all_kernels test_autotune test_multiple_objs ++ ++test_real_1stage: test.c ++ $(CC) -DCURRENT_API_VERSION=20211125 -DTEST_GPU=0 -DTEST_REAL -DTEST_DOUBLE -DTEST_SOLVER_1STAGE -DTEST_EIGENVECTORS -DWITH_MPI -I$(ELPA_INCLUDE) -I$(ELPA_INCLUDE)/elpa -I. -o $@ test.c $(LIBS) ++ ++test_real_2stage_all_kernels: test.c ++ $(CC) -DCURRENT_API_VERSION=20211125 -DTEST_GPU=0 -DTEST_GPU=0 -DTEST_REAL -DTEST_DOUBLE -DTEST_SOLVER_2STAGE -DTEST_EIGENVECTORS -DTEST_ALL_KERNELS -DWITH_MPI -I$(ELPA_INCLUDE) -I$(ELPA_INCLUDE)/elpa -I. -o $@ test.c $(LIBS) ++ ++test_autotune: test_autotune.c ++ $(CC) -DCURRENT_API_VERSION=20211125 -DTEST_GPU=0 -DTEST_REAL -DTEST_DOUBLE -DWITH_MPI -I$(ELPA_INCLUDE) -I$(ELPA_INCLUDE)/elpa -I. -o $@ test_autotune.c $(LIBS) ++ ++test_multiple_objs: test_multiple_objs.c ++ $(CC) -DCURRENT_API_VERSION=20211125 -DTEST_GPU=0 -DTEST_REAL -DTEST_DOUBLE -DWITH_MPI -I$(ELPA_INCLUDE) -I$(ELPA_INCLUDE)/elpa -I. -o $@ test_multiple_objs.c $(LIBS) ++ +diff -ruN elpa-new_release_2021.11.001/examples/C/Makefile_examples_pure_cuda elpa-new_release_2021.11.001_ok/examples/C/Makefile_examples_pure_cuda +--- elpa-new_release_2021.11.001/examples/C/Makefile_examples_pure_cuda 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/C/Makefile_examples_pure_cuda 2022-01-28 09:53:55.785592000 +0100 +@@ -0,0 +1,27 @@ ++# MPICH, that is IntelMPI or ParaStationMPI ++SCALAPACK_LIB = -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64 ++# OpenMPI ++# SCALAPACK_LIB = -lmkl_scalapack_lp64 $(MKLROOT)/lib/intel64/libmkl_blacs_openmpi_lp64.a ++LAPACK_LIB = ++# Intel compiler ++MKL = -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -liomp5 -lpthread -lstdc++ ++# GCC ++# MKL = -lmkl_gf_lp64 -lmkl_sequential -lmkl_core -lgomp -lpthread -lstdc++ -lm ++F90 = mpif90 -O3 -I$(ELPA_MODULES) -I$(ELPA_INCLUDE) -I$(ELPA_INCLUDE)/elpa ++LIBS = -L$(ELPA_LIB) -lelpa -lelpatest $(SCALAPACK_LIB) $(MKL) -lcublas -lcudart ++CC = mpicc -O3 ++ ++all: test_real_1stage test_real_2stage_all_kernels test_autotune test_multiple_objs ++ ++test_real_1stage: test.c ++ $(CC) -DCURRENT_API_VERSION=20211125 -DTEST_NVIDIA_GPU=1 -DTEST_REAL -DTEST_DOUBLE -DTEST_SOLVER_1STAGE -DTEST_EIGENVECTORS -DWITH_MPI -I$(ELPA_INCLUDE) -I$(ELPA_INCLUDE)/elpa -I. -o $@ test.c $(LIBS) ++ ++test_real_2stage_all_kernels: test.c ++ $(CC) -DCURRENT_API_VERSION=20211125 -DTEST_NVIDIA_GPU=1 -DTEST_REAL -DTEST_DOUBLE -DTEST_SOLVER_2STAGE -DTEST_EIGENVECTORS -DTEST_ALL_KERNELS -DWITH_MPI -I$(ELPA_INCLUDE) -I$(ELPA_INCLUDE)/elpa -I. -o $@ test.c $(LIBS) ++ ++test_autotune: test_autotune.c ++ $(CC) -DCURRENT_API_VERSION=20211125 -DTEST_NVIDIA_GPU=1 -DTEST_REAL -DTEST_DOUBLE -DWITH_MPI -I$(ELPA_INCLUDE) -I$(ELPA_INCLUDE)/elpa -I. -o $@ test_autotune.c $(LIBS) ++ ++test_multiple_objs: test_multiple_objs.c ++ $(CC) -DCURRENT_API_VERSION=20211125 -DTEST_NVIDIA_GPU=1 -DTEST_REAL -DTEST_DOUBLE -DWITH_MPI -I$(ELPA_INCLUDE) -I$(ELPA_INCLUDE)/elpa -I. -o $@ test_multiple_objs.c $(LIBS) ++ +diff -ruN elpa-new_release_2021.11.001/examples/C/test_autotune.c elpa-new_release_2021.11.001_ok/examples/C/test_autotune.c +--- elpa-new_release_2021.11.001/examples/C/test_autotune.c 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/C/test_autotune.c 2022-02-01 18:20:00.273429184 +0100 +@@ -0,0 +1,342 @@ ++/* This file is part of ELPA. ++ ++ The ELPA library was originally created by the ELPA consortium, ++ consisting of the following organizations: ++ ++ - Max Planck Computing and Data Facility (MPCDF), formerly known as ++ Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++ - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++ Informatik, ++ - Technische Universität München, Lehrstuhl für Informatik mit ++ Schwerpunkt Wissenschaftliches Rechnen , ++ - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++ - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++ Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++ and ++ - IBM Deutschland GmbH ++ ++ ++ More information can be found here: ++ http://elpa.mpcdf.mpg.de/ ++ ++ ELPA is free software: you can redistribute it and/or modify ++ it under the terms of the version 3 of the license of the ++ GNU Lesser General Public License as published by the Free ++ Software Foundation. ++ ++ ELPA is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public License ++ along with ELPA. If not, see <http://www.gnu.org/licenses/> ++ ++ ELPA reflects a substantial effort on the part of the original ++ ELPA consortium, and we ask you to respect the spirit of the ++ license that we chose: i.e., please contribute any changes you ++ may have back to the original ELPA library distribution, and keep ++ any derivatives of ELPA under the same license that we chose for ++ the original distribution, the GNU Lesser General Public License. ++*/ ++ ++#include "config.h" ++ ++#include <string.h> ++#include <stdio.h> ++#include <stdlib.h> ++#ifdef WITH_MPI ++#include <mpi.h> ++#endif ++#include <math.h> ++ ++#include <elpa/elpa.h> ++#include <assert.h> ++ ++#if !(defined(TEST_REAL) ^ defined(TEST_COMPLEX)) ++//#error "define exactly one of TEST_REAL or TEST_COMPLEX" ++#endif ++ ++#if !(defined(TEST_SINGLE) ^ defined(TEST_DOUBLE)) ++//#error "define exactly one of TEST_SINGLE or TEST_DOUBLE" ++#endif ++ ++#if !(defined(TEST_SOLVER_1STAGE) ^ defined(TEST_SOLVER_2STAGE)) ++//#error "define exactly one of TEST_SOLVER_1STAGE or TEST_SOLVER_2STAGE" ++#endif ++ ++#ifdef TEST_SINGLE ++# define EV_TYPE float ++# ifdef TEST_REAL ++# define MATRIX_TYPE float ++# else ++# define MATRIX_TYPE complex float ++# endif ++#else ++# define EV_TYPE double ++# ifdef TEST_REAL ++# define MATRIX_TYPE double ++# else ++# define MATRIX_TYPE complex double ++# endif ++#endif ++ ++#define assert_elpa_ok(x) assert(x == ELPA_OK) ++ ++#ifdef HAVE_64BIT_INTEGER_SUPPORT ++#define TEST_C_INT_TYPE_PTR long int* ++#define C_INT_TYPE_PTR long int* ++#define TEST_C_INT_TYPE long int ++#define C_INT_TYPE long int ++#else ++#define TEST_C_INT_TYPE_PTR int* ++#define C_INT_TYPE_PTR int* ++#define TEST_C_INT_TYPE int ++#define C_INT_TYPE int ++#endif ++ ++#include "generated.h" ++ ++int main(int argc, char** argv) { ++ /* matrix dimensions */ ++ C_INT_TYPE na, nev, nblk; ++ ++ /* mpi */ ++ C_INT_TYPE myid, nprocs; ++ C_INT_TYPE na_cols, na_rows; ++ C_INT_TYPE np_cols, np_rows; ++ C_INT_TYPE my_prow, my_pcol; ++ C_INT_TYPE mpi_comm; ++ ++ /* blacs */ ++ C_INT_TYPE my_blacs_ctxt, sc_desc[9], info; ++ ++ /* The Matrix */ ++ MATRIX_TYPE *a, *as, *z; ++ EV_TYPE *ev; ++ ++ C_INT_TYPE status; ++ int error_elpa; ++ elpa_t handle; ++ ++ elpa_autotune_t autotune_handle; ++ C_INT_TYPE i, unfinished; ++ ++ C_INT_TYPE value; ++#ifdef WITH_MPI ++ MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &info); ++ MPI_Comm_size(MPI_COMM_WORLD, &nprocs); ++ MPI_Comm_rank(MPI_COMM_WORLD, &myid); ++#else ++ nprocs = 1; ++ myid = 0; ++#endif ++ ++ if (argc == 4) { ++ na = atoi(argv[1]); ++ nev = atoi(argv[2]); ++ nblk = atoi(argv[3]); ++ } else { ++ na = 500; ++ nev = 250; ++ nblk = 16; ++ } ++ ++ for (np_cols = (C_INT_TYPE) sqrt((double) nprocs); np_cols > 1; np_cols--) { ++ if (nprocs % np_cols == 0) { ++ break; ++ } ++ } ++ ++ np_rows = nprocs/np_cols; ++ ++ /* set up blacs */ ++ /* convert communicators before */ ++#ifdef WITH_MPI ++ mpi_comm = MPI_Comm_c2f(MPI_COMM_WORLD); ++#else ++ mpi_comm = 0; ++#endif ++ set_up_blacsgrid_f(mpi_comm, np_rows, np_cols, 'C', &my_blacs_ctxt, &my_prow, &my_pcol); ++ set_up_blacs_descriptor_f(na, nblk, my_prow, my_pcol, np_rows, np_cols, &na_rows, &na_cols, sc_desc, my_blacs_ctxt, &info); ++ ++ /* allocate the matrices needed for elpa */ ++ a = calloc(na_rows*na_cols, sizeof(MATRIX_TYPE)); ++ z = calloc(na_rows*na_cols, sizeof(MATRIX_TYPE)); ++ as = calloc(na_rows*na_cols, sizeof(MATRIX_TYPE)); ++ ev = calloc(na, sizeof(EV_TYPE)); ++ ++#ifdef TEST_REAL ++#ifdef TEST_DOUBLE ++ prepare_matrix_random_real_double_f(na, myid, na_rows, na_cols, sc_desc, a, z, as); ++#else ++ prepare_matrix_random_real_single_f(na, myid, na_rows, na_cols, sc_desc, a, z, as); ++#endif ++#else ++#ifdef TEST_DOUBLE ++ prepare_matrix_random_complex_double_f(na, myid, na_rows, na_cols, sc_desc, a, z, as); ++#else ++ prepare_matrix_random_complex_single_f(na, myid, na_rows, na_cols, sc_desc, a, z, as); ++#endif ++#endif ++ ++ if (elpa_init(CURRENT_API_VERSION) != ELPA_OK) { ++ fprintf(stderr, "Error: ELPA API version not supported"); ++ exit(1); ++ } ++ ++#if OPTIONAL_C_ERROR_ARGUMENT == 1 ++ handle = elpa_allocate(); ++#else ++ handle = elpa_allocate(&error_elpa); ++ assert_elpa_ok(error_elpa); ++#endif ++ assert_elpa_ok(error_elpa); ++ ++ /* Set parameters */ ++ elpa_set(handle, "na", (int) na, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++ elpa_set(handle, "nev", (int) nev, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++ if (myid == 0) { ++ printf("Setting the matrix parameters na=%d, nev=%d \n",na,nev); ++ } ++ elpa_set(handle, "local_nrows", (int) na_rows, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++ elpa_set(handle, "local_ncols", (int) na_cols, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++ elpa_set(handle, "nblk", (int) nblk, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++#ifdef WITH_MPI ++ elpa_set(handle, "mpi_comm_parent", (int) (MPI_Comm_c2f(MPI_COMM_WORLD)), &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++ elpa_set(handle, "process_row", (int) my_prow, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++ elpa_set(handle, "process_col", (int) my_pcol, &error_elpa); ++ assert_elpa_ok(error_elpa); ++#endif ++ ++ /* Setup */ ++ assert_elpa_ok(elpa_setup(handle)); ++ ++#if TEST_NVIDIA_GPU == 1 ++ elpa_set(handle, "nvidia-gpu", 0, &error_elpa); ++ assert_elpa_ok(error_elpa); ++#endif ++ ++#if TEST_INTEL_GPU == 1 ++ elpa_set(handle, "intel-gpu", 0, &error_elpa); ++ assert_elpa_ok(error_elpa); ++#endif ++ ++ autotune_handle = elpa_autotune_setup(handle, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_REAL, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ /* mimic 20 scf steps */ ++ ++ for (i=0; i < 20; i++) { ++ ++ unfinished = elpa_autotune_step(handle, autotune_handle, &error_elpa); ++ ++ if (unfinished == 0) { ++ if (myid == 0) { ++ printf("ELPA autotuning finished in the %d th scf step \n",i); ++ } ++ break; ++ } ++ if (myid == 0) { ++ printf("The current setting of the ELPA object: \n"); ++ elpa_print_settings(handle, &error_elpa); ++ ++ printf("The state of the autotuning: \n"); ++ elpa_autotune_print_state(handle, autotune_handle, &error_elpa); ++ } ++ ++ ++ /* Solve EV problem */ ++ elpa_eigenvectors(handle, a, ev, z, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++ /* check the results */ ++#ifdef TEST_REAL ++#ifdef TEST_DOUBLE ++ status = check_correctness_evp_numeric_residuals_real_double_f(na, nev, na_rows, na_cols, as, z, ev, ++ sc_desc, nblk, myid, np_rows, np_cols, my_prow, my_pcol); ++ memcpy(a, as, na_rows*na_cols*sizeof(double)); ++ ++#else ++ status = check_correctness_evp_numeric_residuals_real_single_f(na, nev, na_rows, na_cols, as, z, ev, ++ sc_desc, nblk, myid, np_rows, np_cols, my_prow, my_pcol); ++ memcpy(a, as, na_rows*na_cols*sizeof(float)); ++#endif ++#else ++#ifdef TEST_DOUBLE ++ status = check_correctness_evp_numeric_residuals_complex_double_f(na, nev, na_rows, na_cols, as, z, ev, ++ sc_desc, nblk, myid, np_rows, np_cols, my_prow, my_pcol); ++ memcpy(a, as, na_rows*na_cols*sizeof(complex double)); ++#else ++ status = check_correctness_evp_numeric_residuals_complex_single_f(na, nev, na_rows, na_cols, as, z, ev, ++ sc_desc, nblk, myid, np_rows, np_cols, my_prow, my_pcol); ++ memcpy(a, as, na_rows*na_cols*sizeof(complex float)); ++#endif ++#endif ++ ++ if (status !=0){ ++ printf("The computed EVs are not correct !\n"); ++ break; ++ } ++ printf("hier %d \n",myid); ++ } ++ ++ if (unfinished == 1) { ++ if (myid == 0) { ++ printf("ELPA autotuning did not finished during %d scf cycles\n",i); ++ ++ } ++ ++ } ++ elpa_autotune_set_best(handle, autotune_handle, &error_elpa); ++ ++ if (myid == 0) { ++ printf("The best combination found by the autotuning:\n"); ++ elpa_autotune_print_best(handle, autotune_handle, &error_elpa); ++ } ++ ++#if OPTIONAL_C_ERROR_ARGUMENT == 1 ++ elpa_autotune_deallocate(autotune_handle); ++ elpa_deallocate(handle); ++#else ++ elpa_autotune_deallocate(autotune_handle, &error_elpa); ++ elpa_deallocate(handle, &error_elpa); ++#endif ++ elpa_uninit(&error_elpa); ++ ++ if (myid == 0) { ++ printf("\n"); ++ printf("2stage ELPA real solver complete\n"); ++ printf("\n"); ++ } ++ ++ if (status ==0){ ++ if (myid ==0) { ++ printf("All ok!\n"); ++ } ++ } ++ ++ free(a); ++ free(z); ++ free(as); ++ free(ev); ++ ++#ifdef WITH_MPI ++ MPI_Finalize(); ++#endif ++ ++ return !!status; ++} +diff -ruN elpa-new_release_2021.11.001/examples/C/test.c elpa-new_release_2021.11.001_ok/examples/C/test.c +--- elpa-new_release_2021.11.001/examples/C/test.c 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/C/test.c 2022-01-28 09:45:19.528434910 +0100 +@@ -0,0 +1,359 @@ ++/* This file is part of ELPA. ++ ++ The ELPA library was originally created by the ELPA consortium, ++ consisting of the following organizations: ++ ++ - Max Planck Computing and Data Facility (MPCDF), formerly known as ++ Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++ - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++ Informatik, ++ - Technische Universität München, Lehrstuhl für Informatik mit ++ Schwerpunkt Wissenschaftliches Rechnen , ++ - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++ - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++ Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++ and ++ - IBM Deutschland GmbH ++ ++ ++ More information can be found here: ++ http://elpa.mpcdf.mpg.de/ ++ ++ ELPA is free software: you can redistribute it and/or modify ++ it under the terms of the version 3 of the license of the ++ GNU Lesser General Public License as published by the Free ++ Software Foundation. ++ ++ ELPA is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public License ++ along with ELPA. If not, see <http://www.gnu.org/licenses/> ++ ++ ELPA reflects a substantial effort on the part of the original ++ ELPA consortium, and we ask you to respect the spirit of the ++ license that we chose: i.e., please contribute any changes you ++ may have back to the original ELPA library distribution, and keep ++ any derivatives of ELPA under the same license that we chose for ++ the original distribution, the GNU Lesser General Public License. ++*/ ++ ++#include "config.h" ++ ++#include <stdio.h> ++#include <stdlib.h> ++#include <string.h> ++#ifdef WITH_MPI ++#include <mpi.h> ++#endif ++#include <math.h> ++ ++#include <elpa/elpa.h> ++#include <assert.h> ++ ++#if !(defined(TEST_REAL) ^ defined(TEST_COMPLEX)) ++#error "define exactly one of TEST_REAL or TEST_COMPLEX" ++#endif ++ ++#if !(defined(TEST_SINGLE) ^ defined(TEST_DOUBLE)) ++#error "define exactly one of TEST_SINGLE or TEST_DOUBLE" ++#endif ++ ++#if !(defined(TEST_SOLVER_1STAGE) ^ defined(TEST_SOLVER_2STAGE)) ++#error "define exactly one of TEST_SOLVER_1STAGE or TEST_SOLVER_2STAGE" ++#endif ++ ++#ifdef TEST_GENERALIZED_DECOMP_EIGENPROBLEM ++#define TEST_GENERALIZED_EIGENPROBLEM ++#endif ++ ++#ifdef TEST_SINGLE ++# define EV_TYPE float ++# ifdef TEST_REAL ++# define MATRIX_TYPE float ++# define PREPARE_MATRIX_RANDOM prepare_matrix_random_real_single_f ++# define PREPARE_MATRIX_RANDOM_SPD prepare_matrix_random_spd_real_single_f ++# define CHECK_CORRECTNESS_EVP_NUMERIC_RESIDUALS check_correctness_evp_numeric_residuals_real_single_f ++# define CHECK_CORRECTNESS_EVP_GEN_NUMERIC_RESIDUALS check_correctness_evp_gen_numeric_residuals_real_single_f ++# else ++# define MATRIX_TYPE complex float ++# define PREPARE_MATRIX_RANDOM prepare_matrix_random_complex_single_f ++# define PREPARE_MATRIX_RANDOM_SPD prepare_matrix_random_spd_complex_single_f ++# define CHECK_CORRECTNESS_EVP_NUMERIC_RESIDUALS check_correctness_evp_numeric_residuals_complex_single_f ++# define CHECK_CORRECTNESS_EVP_GEN_NUMERIC_RESIDUALS check_correctness_evp_gen_numeric_residuals_complex_single_f ++# endif ++#else ++# define EV_TYPE double ++# ifdef TEST_REAL ++# define MATRIX_TYPE double ++# define PREPARE_MATRIX_RANDOM prepare_matrix_random_real_double_f ++# define PREPARE_MATRIX_RANDOM_SPD prepare_matrix_random_spd_real_double_f ++# define CHECK_CORRECTNESS_EVP_NUMERIC_RESIDUALS check_correctness_evp_numeric_residuals_real_double_f ++# define CHECK_CORRECTNESS_EVP_GEN_NUMERIC_RESIDUALS check_correctness_evp_gen_numeric_residuals_real_double_f ++# else ++# define MATRIX_TYPE complex double ++# define PREPARE_MATRIX_RANDOM prepare_matrix_random_complex_double_f ++# define PREPARE_MATRIX_RANDOM_SPD prepare_matrix_random_spd_complex_double_f ++# define CHECK_CORRECTNESS_EVP_NUMERIC_RESIDUALS check_correctness_evp_numeric_residuals_complex_double_f ++# define CHECK_CORRECTNESS_EVP_GEN_NUMERIC_RESIDUALS check_correctness_evp_gen_numeric_residuals_complex_double_f ++# endif ++#endif ++ ++#define assert_elpa_ok(x) assert(x == ELPA_OK) ++ ++ ++#ifdef HAVE_64BIT_INTEGER_MATH_SUPPORT ++#define TEST_C_INT_TYPE_PTR long int* ++#define C_INT_TYPE_PTR long int* ++#define TEST_C_INT_TYPE long int ++#define C_INT_TYPE long int ++#else ++#define TEST_C_INT_TYPE_PTR int* ++#define C_INT_TYPE_PTR int* ++#define TEST_C_INT_TYPE int ++#define C_INT_TYPE int ++#endif ++ ++#ifdef HAVE_64BIT_INTEGER_MPI_SUPPORT ++#define TEST_C_INT_MPI_TYPE_PTR long int* ++#define C_INT_MPI_TYPE_PTR long int* ++#define TEST_C_INT_MPI_TYPE long int ++#define C_INT_MPI_TYPE long int ++#else ++#define TEST_C_INT_MPI_TYPE_PTR int* ++#define C_INT_MPI_TYPE_PTR int* ++#define TEST_C_INT_MPI_TYPE int ++#define C_INT_MPI_TYPE int ++#endif ++ ++#define TEST_GPU 0 ++#if (TEST_NVIDIA_GPU == 1) || (TEST_AMD_GPU == 1) || (TEST_INTEL_GPU == 1) ++#undef TEST_GPU ++#define TEST_GPU 1 ++#endif ++ ++ ++#include "generated.h" ++ ++int main(int argc, char** argv) { ++ /* matrix dimensions */ ++ C_INT_TYPE na, nev, nblk; ++ ++ /* mpi */ ++ C_INT_TYPE myid, nprocs; ++ C_INT_MPI_TYPE myidMPI, nprocsMPI; ++ C_INT_TYPE na_cols, na_rows; ++ C_INT_TYPE np_cols, np_rows; ++ C_INT_TYPE my_prow, my_pcol; ++ C_INT_TYPE mpi_comm; ++ C_INT_MPI_TYPE provided_mpi_thread_level; ++ ++ /* blacs */ ++ C_INT_TYPE my_blacs_ctxt, sc_desc[9], info; ++ ++ /* The Matrix */ ++ MATRIX_TYPE *a, *as, *z, *b, *bs; ++ EV_TYPE *ev; ++ ++ C_INT_TYPE error, status; ++ int error_elpa; ++ ++ elpa_t handle; ++ ++ int value; ++#ifdef WITH_MPI ++#ifndef WITH_OPENMP_TRADITIONAL ++ MPI_Init(&argc, &argv); ++#else ++ MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided_mpi_thread_level); ++ ++ if (provided_mpi_thread_level != MPI_THREAD_SERIALIZED) { ++ fprintf(stderr, "MPI ERROR: MPI_THREAD_SERIALIZED is not provided on this system\n"); ++ MPI_Finalize(); ++ exit(77); ++ } ++#endif ++ ++ MPI_Comm_size(MPI_COMM_WORLD, &nprocsMPI); ++ nprocs = (C_INT_TYPE) nprocsMPI; ++ MPI_Comm_rank(MPI_COMM_WORLD, &myidMPI); ++ myid = (C_INT_TYPE) myidMPI; ++ ++#else ++ nprocs = 1; ++ myid = 0; ++#endif ++ ++ if (argc == 4) { ++ na = atoi(argv[1]); ++ nev = atoi(argv[2]); ++ nblk = atoi(argv[3]); ++ } else { ++ na = 500; ++ nev = 250; ++ nblk = 16; ++ } ++ ++ for (np_cols = (C_INT_TYPE) sqrt((double) nprocs); np_cols > 1; np_cols--) { ++ if (nprocs % np_cols == 0) { ++ break; ++ } ++ } ++ ++ np_rows = nprocs/np_cols; ++ ++ /* set up blacs */ ++ /* convert communicators before */ ++#ifdef WITH_MPI ++ mpi_comm = MPI_Comm_c2f(MPI_COMM_WORLD); ++#else ++ mpi_comm = 0; ++#endif ++ set_up_blacsgrid_f(mpi_comm, np_rows, np_cols, 'C', &my_blacs_ctxt, &my_prow, &my_pcol); ++ set_up_blacs_descriptor_f(na, nblk, my_prow, my_pcol, np_rows, np_cols, &na_rows, &na_cols, sc_desc, my_blacs_ctxt, &info); ++ ++ /* allocate the matrices needed for elpa */ ++ a = calloc(na_rows*na_cols, sizeof(MATRIX_TYPE)); ++ z = calloc(na_rows*na_cols, sizeof(MATRIX_TYPE)); ++ as = calloc(na_rows*na_cols, sizeof(MATRIX_TYPE)); ++ ev = calloc(na, sizeof(EV_TYPE)); ++ ++ PREPARE_MATRIX_RANDOM(na, myid, na_rows, na_cols, sc_desc, a, z, as); ++ ++#if defined(TEST_GENERALIZED_EIGENPROBLEM) ++ b = calloc(na_rows*na_cols, sizeof(MATRIX_TYPE)); ++ bs = calloc(na_rows*na_cols, sizeof(MATRIX_TYPE)); ++ PREPARE_MATRIX_RANDOM_SPD(na, myid, na_rows, na_cols, sc_desc, b, z, bs, nblk, np_rows, np_cols, my_prow, my_pcol); ++#endif ++ ++ if (elpa_init(CURRENT_API_VERSION) != ELPA_OK) { ++ fprintf(stderr, "Error: ELPA API version not supported"); ++ exit(1); ++ } ++ ++ handle = elpa_allocate(&error_elpa); ++ //assert_elpa_ok(error_elpa); ++ ++ /* Set parameters */ ++ elpa_set(handle, "na", (int) na, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++ elpa_set(handle, "nev", (int) nev, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++ if (myid == 0) { ++ printf("Setting the matrix parameters na=%d, nev=%d \n",na,nev); ++ } ++ elpa_set(handle, "local_nrows", (int) na_rows, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++ elpa_set(handle, "local_ncols", (int) na_cols, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++ elpa_set(handle, "nblk", (int) nblk, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++#ifdef WITH_MPI ++ elpa_set(handle, "mpi_comm_parent", (int) (MPI_Comm_c2f(MPI_COMM_WORLD)), &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++ elpa_set(handle, "process_row", (int) my_prow, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++ elpa_set(handle, "process_col", (int) my_pcol, &error_elpa); ++ assert_elpa_ok(error_elpa); ++#endif ++#ifdef TEST_GENERALIZED_EIGENPROBLEM ++ elpa_set(handle, "blacs_context", (int) my_blacs_ctxt, &error_elpa); ++ assert_elpa_ok(error_elpa); ++#endif ++ ++ /* Setup */ ++ assert_elpa_ok(elpa_setup(handle)); ++ ++ /* Set tunables */ ++#ifdef TEST_SOLVER_1STAGE ++ elpa_set(handle, "solver", ELPA_SOLVER_1STAGE, &error_elpa); ++#else ++ elpa_set(handle, "solver", ELPA_SOLVER_2STAGE, &error_elpa); ++#endif ++ assert_elpa_ok(error_elpa); ++ ++#if TEST_NVIDIA_GPU == 1 ++ elpa_set(handle, "nvidia-gpu", TEST_GPU, &error_elpa); ++ assert_elpa_ok(error_elpa); ++#endif ++ ++#if TEST_AMD_GPU == 1 ++ elpa_set(handle, "amd-gpu", TEST_GPU, &error_elpa); ++ assert_elpa_ok(error_elpa); ++#endif ++ ++#if TEST_INTEL_GPU == 1 ++ elpa_set(handle, "intel-gpu", TEST_GPU, &error_elpa); ++ assert_elpa_ok(error_elpa); ++#endif ++ ++#if defined(TEST_SOLVE_2STAGE) && defined(TEST_KERNEL) ++# ifdef TEST_COMPLEX ++ elpa_set(handle, "complex_kernel", TEST_KERNEL, &error_elpa); ++# else ++ elpa_set(handle, "real_kernel", TEST_KERNEL, &error_elpa); ++# endif ++ assert_elpa_ok(error_elpa); ++#endif ++ ++ elpa_get(handle, "solver", &value, &error_elpa); ++ if (myid == 0) { ++ printf("Solver is set to %d \n", value); ++ } ++ ++#if defined(TEST_GENERALIZED_EIGENPROBLEM) ++ elpa_generalized_eigenvectors(handle, a, b, ev, z, 0, &error_elpa); ++#if defined(TEST_GENERALIZED_DECOMP_EIGENPROBLEM) ++ //a = as, so that the problem can be solved again ++ memcpy(a, as, na_rows * na_cols * sizeof(MATRIX_TYPE)); ++ elpa_generalized_eigenvectors(handle, a, b, ev, z, 1, &error_elpa); ++#endif ++#else ++ /* Solve EV problem */ ++ elpa_eigenvectors(handle, a, ev, z, &error_elpa); ++#endif ++ assert_elpa_ok(error_elpa); ++ ++ elpa_deallocate(handle, &error_elpa); ++ elpa_uninit(&error_elpa); ++ ++ /* check the results */ ++#if defined(TEST_GENERALIZED_EIGENPROBLEM) ++ status = CHECK_CORRECTNESS_EVP_GEN_NUMERIC_RESIDUALS(na, nev, na_rows, na_cols, as, z, ev, ++ sc_desc, nblk, myid, np_rows, np_cols, my_prow, my_pcol, bs); ++#else ++ status = CHECK_CORRECTNESS_EVP_NUMERIC_RESIDUALS(na, nev, na_rows, na_cols, as, z, ev, ++ sc_desc, nblk, myid, np_rows, np_cols, my_prow, my_pcol); ++#endif ++ ++ if (status !=0){ ++ printf("The computed EVs are not correct !\n"); ++ } ++ if (status ==0){ ++ printf("All ok!\n"); ++ } ++ ++ free(a); ++ free(z); ++ free(as); ++ free(ev); ++#if defined(TEST_GENERALIZED_EIGENPROBLEM) ++ free(b); ++ free(bs); ++#endif ++ ++#ifdef WITH_MPI ++ MPI_Finalize(); ++#endif ++ ++ return !!status; ++} +diff -ruN elpa-new_release_2021.11.001/examples/C/test_multiple_objs.c elpa-new_release_2021.11.001_ok/examples/C/test_multiple_objs.c +--- elpa-new_release_2021.11.001/examples/C/test_multiple_objs.c 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/C/test_multiple_objs.c 2022-02-01 18:20:51.698668546 +0100 +@@ -0,0 +1,401 @@ ++/* This file is part of ELPA. ++ ++ The ELPA library was originally created by the ELPA consortium, ++ consisting of the following organizations: ++ ++ - Max Planck Computing and Data Facility (MPCDF), formerly known as ++ Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++ - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++ Informatik, ++ - Technische Universität München, Lehrstuhl für Informatik mit ++ Schwerpunkt Wissenschaftliches Rechnen , ++ - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++ - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++ Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++ and ++ - IBM Deutschland GmbH ++ ++ ++ More information can be found here: ++ http://elpa.mpcdf.mpg.de/ ++ ++ ELPA is free software: you can redistribute it and/or modify ++ it under the terms of the version 3 of the license of the ++ GNU Lesser General Public License as published by the Free ++ Software Foundation. ++ ++ ELPA is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public License ++ along with ELPA. If not, see <http://www.gnu.org/licenses/> ++ ++ ELPA reflects a substantial effort on the part of the original ++ ELPA consortium, and we ask you to respect the spirit of the ++ license that we chose: i.e., please contribute any changes you ++ may have back to the original ELPA library distribution, and keep ++ any derivatives of ELPA under the same license that we chose for ++ the original distribution, the GNU Lesser General Public License. ++*/ ++ ++#include "config.h" ++ ++#include <string.h> ++#include <stdio.h> ++#include <stdlib.h> ++#ifdef WITH_MPI ++#include <mpi.h> ++#endif ++#include <math.h> ++ ++#include <elpa/elpa.h> ++#include <assert.h> ++ ++#if !(defined(TEST_REAL) ^ defined(TEST_COMPLEX)) ++//#error "define exactly one of TEST_REAL or TEST_COMPLEX" ++#endif ++ ++#if !(defined(TEST_SINGLE) ^ defined(TEST_DOUBLE)) ++//#error "define exactly one of TEST_SINGLE or TEST_DOUBLE" ++#endif ++ ++#if !(defined(TEST_SOLVER_1STAGE) ^ defined(TEST_SOLVER_2STAGE)) ++//#error "define exactly one of TEST_SOLVER_1STAGE or TEST_SOLVER_2STAGE" ++#endif ++ ++#ifdef TEST_SINGLE ++# define EV_TYPE float ++# ifdef TEST_REAL ++# define MATRIX_TYPE float ++# else ++# define MATRIX_TYPE complex float ++# endif ++#else ++# define EV_TYPE double ++# ifdef TEST_REAL ++# define MATRIX_TYPE double ++# else ++# define MATRIX_TYPE complex double ++# endif ++#endif ++ ++#define assert_elpa_ok(x) assert(x == ELPA_OK) ++#ifdef HAVE_64BIT_INTEGER_SUPPORT ++#define TEST_C_INT_TYPE_PTR long int* ++#define C_INT_TYPE_PTR long int* ++#define TEST_C_INT_TYPE long int ++#define C_INT_TYPE long int ++#else ++#define TEST_C_INT_TYPE_PTR int* ++#define C_INT_TYPE_PTR int* ++#define TEST_C_INT_TYPE int ++#define C_INT_TYPE int ++#endif ++ ++#include "generated.h" ++void set_basic_parameters(elpa_t *handle, C_INT_TYPE na, C_INT_TYPE nev, C_INT_TYPE na_rows, C_INT_TYPE na_cols, C_INT_TYPE nblk, C_INT_TYPE my_prow, C_INT_TYPE my_pcol){ ++ int error_elpa; ++ elpa_set(*handle, "na", (int) na, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++ elpa_set(*handle, "nev", (int) nev, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++ elpa_set(*handle, "local_nrows", (int) na_rows, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++ elpa_set(*handle, "local_ncols", (int) na_cols, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++ elpa_set(*handle, "nblk", (int) nblk, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++#ifdef WITH_MPI ++ elpa_set(*handle, "mpi_comm_parent", (int) (MPI_Comm_c2f(MPI_COMM_WORLD)), &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++ elpa_set(*handle, "process_row", (int) my_prow, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++ elpa_set(*handle, "process_col", (int) my_pcol, &error_elpa); ++ assert_elpa_ok(error_elpa); ++#endif ++} ++ ++ ++int main(int argc, char** argv) { ++ /* matrix dimensions */ ++ C_INT_TYPE na, nev, nblk; ++ ++ /* mpi */ ++ C_INT_TYPE myid, nprocs; ++ C_INT_TYPE na_cols, na_rows; ++ C_INT_TYPE np_cols, np_rows; ++ C_INT_TYPE my_prow, my_pcol; ++ C_INT_TYPE mpi_comm; ++ ++ /* blacs */ ++ C_INT_TYPE my_blacs_ctxt, sc_desc[9], info; ++ ++ /* The Matrix */ ++ MATRIX_TYPE *a, *as, *z; ++ EV_TYPE *ev; ++ ++ C_INT_TYPE status; ++ int error_elpa; ++ int gpu, timings, debug; ++ char str[400]; ++ ++ elpa_t elpa_handle_1, elpa_handle_2, *elpa_handle_ptr; ++ ++ elpa_autotune_t autotune_handle; ++ C_INT_TYPE i, unfinished; ++ ++ C_INT_TYPE value; ++#ifdef WITH_MPI ++ MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &info); ++ MPI_Comm_size(MPI_COMM_WORLD, &nprocs); ++ MPI_Comm_rank(MPI_COMM_WORLD, &myid); ++#else ++ nprocs = 1; ++ myid = 0; ++#endif ++ ++ if (argc == 4) { ++ na = atoi(argv[1]); ++ nev = atoi(argv[2]); ++ nblk = atoi(argv[3]); ++ } else { ++ na = 500; ++ nev = 250; ++ nblk = 16; ++ } ++ ++ for (np_cols = (C_INT_TYPE) sqrt((double) nprocs); np_cols > 1; np_cols--) { ++ if (nprocs % np_cols == 0) { ++ break; ++ } ++ } ++ ++ np_rows = nprocs/np_cols; ++ ++ /* set up blacs */ ++ /* convert communicators before */ ++#ifdef WITH_MPI ++ mpi_comm = MPI_Comm_c2f(MPI_COMM_WORLD); ++#else ++ mpi_comm = 0; ++#endif ++ set_up_blacsgrid_f(mpi_comm, np_rows, np_cols, 'C', &my_blacs_ctxt, &my_prow, &my_pcol); ++ set_up_blacs_descriptor_f(na, nblk, my_prow, my_pcol, np_rows, np_cols, &na_rows, &na_cols, sc_desc, my_blacs_ctxt, &info); ++ ++ /* allocate the matrices needed for elpa */ ++ a = calloc(na_rows*na_cols, sizeof(MATRIX_TYPE)); ++ z = calloc(na_rows*na_cols, sizeof(MATRIX_TYPE)); ++ as = calloc(na_rows*na_cols, sizeof(MATRIX_TYPE)); ++ ev = calloc(na, sizeof(EV_TYPE)); ++ ++#ifdef TEST_REAL ++#ifdef TEST_DOUBLE ++ prepare_matrix_random_real_double_f(na, myid, na_rows, na_cols, sc_desc, a, z, as); ++#else ++ prepare_matrix_random_real_single_f(na, myid, na_rows, na_cols, sc_desc, a, z, as); ++#endif ++#else ++#ifdef TEST_DOUBLE ++ prepare_matrix_random_complex_double_f(na, myid, na_rows, na_cols, sc_desc, a, z, as); ++#else ++ prepare_matrix_random_complex_single_f(na, myid, na_rows, na_cols, sc_desc, a, z, as); ++#endif ++#endif ++ ++ if (elpa_init(CURRENT_API_VERSION) != ELPA_OK) { ++ fprintf(stderr, "Error: ELPA API version not supported"); ++ exit(1); ++ } ++ ++ elpa_handle_1 = elpa_allocate(&error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++ set_basic_parameters(&elpa_handle_1, na, nev, na_rows, na_cols, nblk, my_prow, my_pcol); ++ /* Setup */ ++ assert_elpa_ok(elpa_setup(elpa_handle_1)); ++ ++#if TEST_NVIDIA_GPU == 1 ++ elpa_set(elpa_handle_1, "nvidia-gpu", 0, &error_elpa); ++ assert_elpa_ok(error_elpa); ++#endif ++ ++#if TEST_INTEL_GPU == 1 ++ elpa_set(elpa_handle_1, "intel-gpu", 0, &error_elpa); ++ assert_elpa_ok(error_elpa); ++#endif ++ ++ elpa_set(elpa_handle_1, "timings", 1, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++ elpa_set(elpa_handle_1, "debug", 1, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++ elpa_store_settings(elpa_handle_1, "initial_parameters.txt", &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++#ifdef WITH_MPI ++ // barrier after store settings, file created from one MPI rank only, but loaded everywhere ++ MPI_Barrier(MPI_COMM_WORLD); ++#endif ++ ++#if OPTIONAL_C_ERROR_ARGUMENT == 1 ++ elpa_handle_2 = elpa_allocate(); ++#else ++ elpa_handle_2 = elpa_allocate(&error_elpa); ++ assert_elpa_ok(error_elpa); ++#endif ++ ++ set_basic_parameters(&elpa_handle_2, na, nev, na_rows, na_cols, nblk, my_prow, my_pcol); ++ /* Setup */ ++ assert_elpa_ok(elpa_setup(elpa_handle_2)); ++ ++ elpa_load_settings(elpa_handle_2, "initial_parameters.txt", &error_elpa); ++ ++#if TEST_NVIDIA_GPU == 1 ++ elpa_get(elpa_handle_2, "nvidia-gpu", &gpu, &error_elpa); ++ assert_elpa_ok(error_elpa); ++#endif ++ ++#if TEST_INTEL_GPU == 1 ++ elpa_get(elpa_handle_2, "intel-gpu", &gpu, &error_elpa); ++ assert_elpa_ok(error_elpa); ++#endif ++ ++ elpa_get(elpa_handle_2, "timings", &timings, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++ elpa_get(elpa_handle_2, "debug", &debug, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++ if ((timings != 1) || (debug != 1) || (gpu != 0)){ ++ printf("Parameters not stored or loaded correctly. Aborting... %d, %d, %d\n", timings, debug, gpu); ++ exit(1); ++ } ++ ++ elpa_handle_ptr = &elpa_handle_2; ++ ++ autotune_handle = elpa_autotune_setup(*elpa_handle_ptr, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_REAL, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ /* mimic 20 scf steps */ ++ ++ for (i=0; i < 20; i++) { ++ ++ unfinished = elpa_autotune_step(*elpa_handle_ptr, autotune_handle, &error_elpa); ++ ++ if (unfinished == 0) { ++ if (myid == 0) { ++ printf("ELPA autotuning finished in the %d th scf step \n",i); ++ } ++ break; ++ } ++ ++ elpa_print_settings(*elpa_handle_ptr, &error_elpa); ++ elpa_autotune_print_state(*elpa_handle_ptr, autotune_handle, &error_elpa); ++ ++ sprintf(str, "saved_parameters_%d.txt", i); ++ elpa_store_settings(*elpa_handle_ptr, str, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++ /* Solve EV problem */ ++ elpa_eigenvectors(*elpa_handle_ptr, a, ev, z, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++ /* check the results */ ++#ifdef TEST_REAL ++#ifdef TEST_DOUBLE ++ status = check_correctness_evp_numeric_residuals_real_double_f(na, nev, na_rows, na_cols, as, z, ev, ++ sc_desc, nblk, myid, np_rows, np_cols, my_prow, my_pcol); ++ memcpy(a, as, na_rows*na_cols*sizeof(double)); ++ ++#else ++ status = check_correctness_evp_numeric_residuals_real_single_f(na, nev, na_rows, na_cols, as, z, ev, ++ sc_desc, nblk, myid, np_rows, np_cols, my_prow, my_pcol); ++ memcpy(a, as, na_rows*na_cols*sizeof(float)); ++#endif ++#else ++#ifdef TEST_DOUBLE ++ status = check_correctness_evp_numeric_residuals_complex_double_f(na, nev, na_rows, na_cols, as, z, ev, ++ sc_desc, nblk, myid, np_rows, np_cols, my_prow, my_pcol); ++ memcpy(a, as, na_rows*na_cols*sizeof(complex double)); ++#else ++ status = check_correctness_evp_numeric_residuals_complex_single_f(na, nev, na_rows, na_cols, as, z, ev, ++ sc_desc, nblk, myid, np_rows, np_cols, my_prow, my_pcol); ++ memcpy(a, as, na_rows*na_cols*sizeof(complex float)); ++#endif ++#endif ++ ++ if (status !=0){ ++ printf("The computed EVs are not correct !\n"); ++ break; ++ } ++ ++ elpa_autotune_print_state(*elpa_handle_ptr, autotune_handle, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++ sprintf(str, "saved_state_%d.txt", i); ++ elpa_autotune_save_state(*elpa_handle_ptr, autotune_handle, str, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++#ifdef WITH_MPI ++ //barrier after save state, file created from one MPI rank only, but loaded everywhere ++ MPI_Barrier(MPI_COMM_WORLD); ++#endif ++ ++ elpa_autotune_load_state(*elpa_handle_ptr, autotune_handle, str, &error_elpa); ++ assert_elpa_ok(error_elpa); ++ ++ if (unfinished == 1) { ++ if (myid == 0) { ++ printf("ELPA autotuning did not finished during %d scf cycles\n",i); ++ } ++ } ++ ++ } ++ elpa_autotune_set_best(*elpa_handle_ptr, autotune_handle, &error_elpa); ++ ++ if (myid == 0) { ++ printf("The best combination found by the autotuning:\n"); ++ elpa_autotune_print_best(*elpa_handle_ptr, autotune_handle, &error_elpa); ++ } ++ ++ elpa_autotune_deallocate(autotune_handle, &error_elpa); ++ elpa_deallocate(elpa_handle_1, &error_elpa); ++#if OPTIONAL_C_ERROR_ARGUMENT == 1 ++ elpa_deallocate(elpa_handle_2); ++#else ++ elpa_deallocate(elpa_handle_2, &error_elpa); ++#endif ++ elpa_uninit(&error_elpa); ++ ++ if (myid == 0) { ++ printf("\n"); ++ printf("2stage ELPA real solver complete\n"); ++ printf("\n"); ++ } ++ ++ if (status ==0){ ++ if (myid ==0) { ++ printf("All ok!\n"); ++ } ++ } ++ ++ free(a); ++ free(z); ++ free(as); ++ free(ev); ++ ++#ifdef WITH_MPI ++ MPI_Finalize(); ++#endif ++ ++ return !!status; ++} +diff -ruN elpa-new_release_2021.11.001/examples/Fortran/assert.h elpa-new_release_2021.11.001_ok/examples/Fortran/assert.h +--- elpa-new_release_2021.11.001/examples/Fortran/assert.h 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/Fortran/assert.h 2022-01-26 10:05:16.438246000 +0100 +@@ -0,0 +1,7 @@ ++#define stringify_(x) "x" ++#define stringify(x) stringify_(x) ++#define assert(x) call x_a(x, stringify(x), "F", __LINE__) ++ ++#define assert_elpa_ok(error_code) call x_ao(error_code, stringify(error_code), __FILE__, __LINE__) ++ ++! vim: syntax=fortran +diff -ruN elpa-new_release_2021.11.001/examples/Fortran/elpa2/complex_2stage_banded.F90 elpa-new_release_2021.11.001_ok/examples/Fortran/elpa2/complex_2stage_banded.F90 +--- elpa-new_release_2021.11.001/examples/Fortran/elpa2/complex_2stage_banded.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/Fortran/elpa2/complex_2stage_banded.F90 2022-01-26 10:09:19.163552000 +0100 +@@ -0,0 +1,300 @@ ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Max Planck Computing and Data Facility (MPCDF), formerly known as ++! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++! ++! ++#include "config-f90.h" ++#include "../assert.h" ++!> ++!> Fortran test programm to demonstrates the use of ++!> ELPA 2 complex case library. ++!> If "HAVE_REDIRECT" was defined at build time ++!> the stdout and stderr output of each MPI task ++!> can be redirected to files if the environment ++!> variable "REDIRECT_ELPA_TEST_OUTPUT" is set ++!> to "true". ++!> ++!> By calling executable [arg1] [arg2] [arg3] [arg4] ++!> one can define the size (arg1), the number of ++!> Eigenvectors to compute (arg2), and the blocking (arg3). ++!> If these values are not set default values (500, 150, 16) ++!> are choosen. ++!> If these values are set the 4th argument can be ++!> "output", which specifies that the EV's are written to ++!> an ascii file. ++!> ++!> The complex ELPA 2 kernel is set as the default kernel. ++!> However, this can be overriden by setting ++!> the environment variable "COMPLEX_ELPA_KERNEL" to an ++!> appropiate value. ++!> ++ ++#include "config-f90.h" ++ ++#ifdef HAVE_64BIT_INTEGER_MATH_SUPPORT ++#define TEST_INT_TYPE integer(kind=c_int64_t) ++#define INT_TYPE c_int64_t ++#else ++#define TEST_INT_TYPE integer(kind=c_int32_t) ++#define INT_TYPE c_int32_t ++#endif ++#ifdef HAVE_64BIT_INTEGER_MPI_SUPPORT ++#define TEST_INT_MPI_TYPE integer(kind=c_int64_t) ++#define INT_MPI_TYPE c_int64_t ++#else ++#define TEST_INT_MPI_TYPE integer(kind=c_int32_t) ++#define INT_MPI_TYPE c_int32_t ++#endif ++ ++program test_complex2_double_banded ++ ++!------------------------------------------------------------------------------- ++! Standard eigenvalue problem - COMPLEX version ++! ++! This program demonstrates the use of the ELPA module ++! together with standard scalapack routines ++! ++! Copyright of the original code rests with the authors inside the ELPA ++! consortium. The copyright of any additional modifications shall rest ++! with their original authors, but shall adhere to the licensing terms ++! distributed along with the original code in the file "COPYING". ++!------------------------------------------------------------------------------- ++ use elpa ++ ++ !use test_util ++ use test_read_input_parameters ++ use test_check_correctness ++ use test_setup_mpi ++ use test_blacs_infrastructure ++ use test_prepare_matrix ++#ifdef HAVE_REDIRECT ++ use test_redirect ++#endif ++ use test_output_type ++ implicit none ++ ++ !------------------------------------------------------------------------------- ++ ! Please set system size parameters below! ++ ! na: System size ++ ! nev: Number of eigenvectors to be calculated ++ ! nblk: Blocking factor in block cyclic distribution ++ !------------------------------------------------------------------------------- ++ ++ TEST_INT_TYPE :: nblk ++ TEST_INT_TYPE :: na, nev ++ ++ TEST_INT_TYPE :: np_rows, np_cols, na_rows, na_cols ++ ++ TEST_INT_TYPE :: myid, nprocs, my_prow, my_pcol, mpi_comm_rows, mpi_comm_cols ++ TEST_INT_TYPE :: i, my_blacs_ctxt, sc_desc(9), info, nprow, npcol ++ TEST_INT_MPI_TYPE :: mpierr ++#ifdef WITH_MPI ++ !TEST_INT_TYPE, external :: numroc ++#endif ++ complex(kind=ck8), parameter :: CZERO = (0.0_rk8,0.0_rk8), CONE = (1.0_rk8,0.0_rk8) ++ real(kind=rk8), allocatable :: ev(:) ++ ++ complex(kind=ck8), allocatable :: a(:,:), z(:,:), as(:,:) ++ ++ TEST_INT_TYPE :: STATUS ++#ifdef WITH_OPENMP_TRADITIONAL ++ TEST_INT_TYPE :: omp_get_max_threads, required_mpi_thread_level, provided_mpi_thread_level ++#endif ++ type(output_t) :: write_to_file ++ integer(kind=c_int) :: error_elpa ++ character(len=8) :: task_suffix ++ TEST_INT_TYPE :: j ++ ++ ++ TEST_INT_TYPE :: numberOfDevices ++ TEST_INT_TYPE :: global_row, global_col, local_row, local_col ++ TEST_INT_TYPE :: bandwidth ++ class(elpa_t), pointer :: e ++ ++#define COMPLEXCASE ++#define DOUBLE_PRECISION_COMPLEX 1 ++ ++ call read_input_parameters(na, nev, nblk, write_to_file) ++ ++ if (nblk .eq. 1) then ++ stop 77 ++ endif ++ ++ !------------------------------------------------------------------------------- ++ ! MPI Initialization ++ call setup_mpi(myid, nprocs) ++ ++ STATUS = 0 ++ ++ !------------------------------------------------------------------------------- ++ ! Selection of number of processor rows/columns ++ ! We try to set up the grid square-like, i.e. start the search for possible ++ ! divisors of nprocs with a number next to the square root of nprocs ++ ! and decrement it until a divisor is found. ++ ++ do np_cols = NINT(SQRT(REAL(nprocs))),2,-1 ++ if(mod(nprocs,np_cols) == 0 ) exit ++ enddo ++ ! at the end of the above loop, nprocs is always divisible by np_cols ++ ++ np_rows = nprocs/np_cols ++ ++ if(myid==0) then ++ print * ++ print '(a)','Standard eigenvalue problem - COMPLEX version' ++ print * ++ print '(3(a,i0))','Matrix size=',na,', Number of eigenvectors=',nev,', Block size=',nblk ++ print '(3(a,i0))','Number of processor rows=',np_rows,', cols=',np_cols,', total=',nprocs ++ print * ++ endif ++ ++ !------------------------------------------------------------------------------- ++ ! Set up BLACS context and MPI communicators ++ ! ++ ! The BLACS context is only necessary for using Scalapack. ++ ! ++ ! For ELPA, the MPI communicators along rows/cols are sufficient, ++ ! and the grid setup may be done in an arbitrary way as long as it is ++ ! consistent (i.e. 0<=my_prow<np_rows, 0<=my_pcol<np_cols and every ++ ! process has a unique (my_prow,my_pcol) pair). ++ ++ call set_up_blacsgrid(int(mpi_comm_world,kind=BLAS_KIND), np_rows, np_cols, 'C', & ++ my_blacs_ctxt, my_prow, my_pcol) ++ ++ if (myid==0) then ++ print '(a)','| Past BLACS_Gridinfo.' ++ end if ++ ++ ! Determine the necessary size of the distributed matrices, ++ ! we use the Scalapack tools routine NUMROC for that. ++ ++ call set_up_blacs_descriptor(na ,nblk, my_prow, my_pcol, np_rows, np_cols, & ++ na_rows, na_cols, sc_desc, my_blacs_ctxt, info) ++ ++ if (myid==0) then ++ print '(a)','| Past scalapack descriptor setup.' ++ end if ++ !------------------------------------------------------------------------------- ++ ! Allocate matrices and set up a test matrix for the eigenvalue problem ++ ++ allocate(a (na_rows,na_cols)) ++ allocate(z (na_rows,na_cols)) ++ allocate(as(na_rows,na_cols)) ++ ++ allocate(ev(na)) ++ ++ call prepare_matrix_random(na, myid, sc_desc, a, z, as) ++ ++ ! set values outside of the bandwidth to zero ++ bandwidth = nblk ++ ++ do local_row = 1, na_rows ++ global_row = index_l2g( local_row, nblk, my_prow, np_rows ) ++ do local_col = 1, na_cols ++ global_col = index_l2g( local_col, nblk, my_pcol, np_cols ) ++ ++ if (ABS(global_row-global_col) > bandwidth) then ++ a(local_row, local_col) = 0 ++ as(local_row, local_col) = 0 ++ end if ++ end do ++ end do ++ ++ ++ if (elpa_init(CURRENT_API_VERSION) /= ELPA_OK) then ++ print *, "ELPA API version not supported" ++ stop 1 ++ endif ++ ++ e => elpa_allocate(error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call e%set("na", int(na,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("nev", int(nev,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("local_nrows", int(na_rows,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("local_ncols", int(na_cols,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("nblk", int(nblk,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++#ifdef WITH_MPI ++ call e%set("mpi_comm_parent", int(MPI_COMM_WORLD,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("process_row", int(my_prow,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("process_col", int(my_pcol,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++ ++ call e%set("bandwidth", int(bandwidth,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ assert(e%setup() .eq. ELPA_OK) ++ ++ call e%set("solver", ELPA_SOLVER_2STAGE, error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%eigenvectors(a, ev, z, error_elpa) ++ assert_elpa_ok(error_elpa) ++ call elpa_deallocate(e, error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call elpa_uninit(error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ !------------------------------------------------------------------------------- ++ ! Test correctness of result (using plain scalapack routines) ++ status = check_correctness_evp_numeric_residuals(na, nev, as, z, ev, sc_desc, nblk, myid, np_rows, np_cols, my_prow, my_pcol) ++ ++ deallocate(a) ++ deallocate(as) ++ ++ deallocate(z) ++ deallocate(ev) ++ ++#ifdef WITH_MPI ++ call blacs_gridexit(my_blacs_ctxt) ++ call mpi_finalize(mpierr) ++#endif ++ call EXIT(STATUS) ++end ++ ++!------------------------------------------------------------------------------- +diff -ruN elpa-new_release_2021.11.001/examples/Fortran/elpa2/double_instance.F90 elpa-new_release_2021.11.001_ok/examples/Fortran/elpa2/double_instance.F90 +--- elpa-new_release_2021.11.001/examples/Fortran/elpa2/double_instance.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/Fortran/elpa2/double_instance.F90 2022-01-26 10:09:19.164133000 +0100 +@@ -0,0 +1,244 @@ ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Max Planck Computing and Data Facility (MPCDF), formerly known as ++! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++! ++! ++#include "config-f90.h" ++ ++#ifdef HAVE_64BIT_INTEGER_MATH_SUPPORT ++#define TEST_INT_TYPE integer(kind=c_int64_t) ++#define INT_TYPE c_int64_t ++#else ++#define TEST_INT_TYPE integer(kind=c_int32_t) ++#define INT_TYPE c_int32_t ++#endif ++#ifdef HAVE_64BIT_INTEGER_MPI_SUPPORT ++#define TEST_INT_MPI_TYPE integer(kind=c_int64_t) ++#define INT_MPI_TYPE c_int64_t ++#else ++#define TEST_INT_MPI_TYPE integer(kind=c_int32_t) ++#define INT_MPI_TYPE c_int32_t ++#endif ++#include "../assert.h" ++ ++program test_interface ++ use elpa ++ ++ use precision_for_tests ++ !use test_util ++ use test_setup_mpi ++ use test_prepare_matrix ++ use test_read_input_parameters ++ use test_blacs_infrastructure ++ use test_check_correctness ++ implicit none ++ ++ ! matrix dimensions ++ TEST_INT_TYPE :: na, nev, nblk ++ ++ ! mpi ++ TEST_INT_TYPE :: myid, nprocs ++ TEST_INT_TYPE :: na_cols, na_rows ! local matrix size ++ TEST_INT_TYPE :: np_cols, np_rows ! number of MPI processes per column/row ++ TEST_INT_TYPE :: my_prow, my_pcol ! local MPI task position (my_prow, my_pcol) in the grid (0..np_cols -1, 0..np_rows -1) ++ TEST_INT_MPI_TYPE :: mpierr ++ ++ ! blacs ++ TEST_INT_TYPE :: my_blacs_ctxt, sc_desc(9), info, nprow, npcol ++ ++ ! The Matrix ++ real(kind=C_DOUBLE), allocatable :: a1(:,:), as1(:,:) ++ ! eigenvectors ++ real(kind=C_DOUBLE), allocatable :: z1(:,:) ++ ! eigenvalues ++ real(kind=C_DOUBLE), allocatable :: ev1(:) ++ ++ ! The Matrix ++ complex(kind=C_DOUBLE_COMPLEX), allocatable :: a2(:,:), as2(:,:) ++ ! eigenvectors ++ complex(kind=C_DOUBLE_COMPLEX), allocatable :: z2(:,:) ++ ! eigenvalues ++ real(kind=C_DOUBLE), allocatable :: ev2(:) ++ TEST_INT_TYPE :: status ++ integer(kind=c_int) :: error_elpa ++ ++ TEST_INT_TYPE :: solver ++ TEST_INT_TYPE :: qr ++ ++ type(output_t) :: write_to_file ++ class(elpa_t), pointer :: e1, e2 ++ ++ call read_input_parameters(na, nev, nblk, write_to_file) ++ call setup_mpi(myid, nprocs) ++ ++ status = 0 ++ ++ do np_cols = NINT(SQRT(REAL(nprocs))),2,-1 ++ if(mod(nprocs,np_cols) == 0 ) exit ++ enddo ++ ++ np_rows = nprocs/np_cols ++ ++ my_prow = mod(myid, np_cols) ++ my_pcol = myid / np_cols ++ ++ call set_up_blacsgrid(int(mpi_comm_world,kind=BLAS_KIND), np_rows, np_cols, 'C', & ++ my_blacs_ctxt, my_prow, my_pcol) ++ ++ call set_up_blacs_descriptor(na, nblk, my_prow, my_pcol, np_rows, np_cols, & ++ na_rows, na_cols, sc_desc, my_blacs_ctxt, info) ++ ++ allocate(a1 (na_rows,na_cols), as1(na_rows,na_cols)) ++ allocate(z1 (na_rows,na_cols)) ++ allocate(ev1(na)) ++ ++ a1(:,:) = 0.0 ++ z1(:,:) = 0.0 ++ ev1(:) = 0.0 ++ ++ call prepare_matrix_random(na, myid, sc_desc, a1, z1, as1) ++ allocate(a2 (na_rows,na_cols), as2(na_rows,na_cols)) ++ allocate(z2 (na_rows,na_cols)) ++ allocate(ev2(na)) ++ ++ a2(:,:) = 0.0 ++ z2(:,:) = 0.0 ++ ev2(:) = 0.0 ++ ++ call prepare_matrix_random(na, myid, sc_desc, a2, z2, as2) ++ ++ if (elpa_init(CURRENT_API_VERSION) /= ELPA_OK) then ++ print *, "ELPA API version not supported" ++ stop 1 ++ endif ++ ++ e1 => elpa_allocate(error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call e1%set("na", int(na,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e1%set("nev", int(nev,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e1%set("local_nrows", int(na_rows,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e1%set("local_ncols", int(na_cols,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e1%set("nblk", int(nblk,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++#ifdef WITH_MPI ++ call e1%set("mpi_comm_parent", int(MPI_COMM_WORLD,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e1%set("process_row", int(my_prow,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e1%set("process_col", int(my_pcol,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++ ++ assert(e1%setup() .eq. ELPA_OK) ++ ++ call e1%set("solver", ELPA_SOLVER_2STAGE, error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call e1%set("real_kernel", ELPA_2STAGE_REAL_DEFAULT, error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ ++ e2 => elpa_allocate(error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call e2%set("na", int(na,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e2%set("nev", int(nev,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e2%set("local_nrows", int(na_rows,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e2%set("local_ncols", int(na_cols,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e2%set("nblk", int(nblk,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++#ifdef WITH_MPI ++ call e2%set("mpi_comm_parent", int(MPI_COMM_WORLD,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e2%set("process_row", int(my_prow,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e2%set("process_col", int(my_pcol,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++ assert(e2%setup() .eq. ELPA_OK) ++ ++ call e2%set("solver", ELPA_SOLVER_1STAGE, error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call e1%eigenvectors(a1, ev1, z1, error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call elpa_deallocate(e1, error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call e2%eigenvectors(a2, ev2, z2, error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call elpa_deallocate(e2, error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call elpa_uninit(error_elpa) ++ ++ status = check_correctness_evp_numeric_residuals(na, nev, as1, z1, ev1, sc_desc, nblk, myid, np_rows, np_cols, my_prow, my_pcol) ++ ++ deallocate(a1) ++ deallocate(as1) ++ deallocate(z1) ++ deallocate(ev1) ++ ++ status = check_correctness_evp_numeric_residuals(na, nev, as2, z2, ev2, sc_desc, nblk, myid, np_rows, np_cols, my_prow, my_pcol) ++ ++ deallocate(a2) ++ deallocate(as2) ++ deallocate(z2) ++ deallocate(ev2) ++ ++#ifdef WITH_MPI ++ call blacs_gridexit(my_blacs_ctxt) ++ call mpi_finalize(mpierr) ++#endif ++ call EXIT(STATUS) ++ ++ ++end program +diff -ruN elpa-new_release_2021.11.001/examples/Fortran/elpa2/real_2stage_banded.F90 elpa-new_release_2021.11.001_ok/examples/Fortran/elpa2/real_2stage_banded.F90 +--- elpa-new_release_2021.11.001/examples/Fortran/elpa2/real_2stage_banded.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/Fortran/elpa2/real_2stage_banded.F90 2022-01-26 10:09:19.164859000 +0100 +@@ -0,0 +1,298 @@ ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Max Planck Computing and Data Facility (MPCDF), formerly known as ++! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++! ++! ++#include "config-f90.h" ++#include "../assert.h" ++!> ++!> Fortran test programm to demonstrates the use of ++!> ELPA 2 real case library. ++!> If "HAVE_REDIRECT" was defined at build time ++!> the stdout and stderr output of each MPI task ++!> can be redirected to files if the environment ++!> variable "REDIRECT_ELPA_TEST_OUTPUT" is set ++!> to "true". ++!> ++!> By calling executable [arg1] [arg2] [arg3] [arg4] ++!> one can define the size (arg1), the number of ++!> Eigenvectors to compute (arg2), and the blocking (arg3). ++!> If these values are not set default values (500, 150, 16) ++!> are choosen. ++!> If these values are set the 4th argument can be ++!> "output", which specifies that the EV's are written to ++!> an ascii file. ++!> ++!> The real ELPA 2 kernel is set as the default kernel. ++!> However, this can be overriden by setting ++!> the environment variable "REAL_ELPA_KERNEL" to an ++!> appropiate value. ++!> ++ ++#include "config-f90.h" ++ ++#ifdef HAVE_64BIT_INTEGER_MATH_SUPPORT ++#define TEST_INT_TYPE integer(kind=c_int64_t) ++#define INT_TYPE c_int64_t ++#else ++#define TEST_INT_TYPE integer(kind=c_int32_t) ++#define INT_TYPE c_int32_t ++#endif ++ ++#ifdef HAVE_64BIT_INTEGER_MPI_SUPPORT ++#define TEST_INT_MPI_TYPE integer(kind=c_int64_t) ++#define INT_MPI_TYPE c_int64_t ++#else ++#define TEST_INT_MPI_TYPE integer(kind=c_int32_t) ++#define INT_MPI_TYPE c_int32_t ++#endif ++ ++program test_real2_double_banded ++ ++!------------------------------------------------------------------------------- ++! Standard eigenvalue problem - REAL version ++! ++! This program demonstrates the use of the ELPA module ++! together with standard scalapack routines ++! ++! Copyright of the original code rests with the authors inside the ELPA ++! consortium. The copyright of any additional modifications shall rest ++! with their original authors, but shall adhere to the licensing terms ++! distributed along with the original code in the file "COPYING". ++! ++!------------------------------------------------------------------------------- ++ use elpa ++ ++ !use test_util ++ use test_read_input_parameters ++ use test_check_correctness ++ use test_setup_mpi ++ use test_blacs_infrastructure ++ use test_prepare_matrix ++#ifdef HAVE_REDIRECT ++ use test_redirect ++#endif ++ use test_output_type ++ implicit none ++ ++ !------------------------------------------------------------------------------- ++ ! Please set system size parameters below! ++ ! na: System size ++ ! nev: Number of eigenvectors to be calculated ++ ! nblk: Blocking factor in block cyclic distribution ++ !------------------------------------------------------------------------------- ++ ++ TEST_INT_TYPE :: nblk ++ TEST_INT_TYPE :: na, nev ++ ++ TEST_INT_TYPE :: np_rows, np_cols, na_rows, na_cols ++ ++ TEST_INT_TYPE :: myid, nprocs, my_prow, my_pcol, mpi_comm_rows, mpi_comm_cols ++ TEST_INT_TYPE :: i, my_blacs_ctxt, sc_desc(9), info, nprow, npcol ++ TEST_INT_MPI_TYPE :: mpierr ++ !TEST_INT_TYPE, external :: numroc ++ ++ real(kind=rk8), allocatable :: a(:,:), z(:,:), as(:,:), ev(:) ++ ++ TEST_INT_TYPE :: STATUS ++#ifdef WITH_OPENMP_TRADITIONAL ++ TEST_INT_TYPE :: omp_get_max_threads, required_mpi_thread_level, provided_mpi_thread_level ++#endif ++ integer(kind=c_int) :: error_elpa ++ TEST_INT_TYPE :: numberOfDevices ++ type(output_t) :: write_to_file ++ character(len=8) :: task_suffix ++ TEST_INT_TYPE :: j ++ TEST_INT_TYPE :: global_row, global_col, local_row, local_col ++ TEST_INT_TYPE :: bandwidth ++ class(elpa_t), pointer :: e ++#define DOUBLE_PRECISION_REAL 1 ++ ++ ++ call read_input_parameters(na, nev, nblk, write_to_file) ++ ++ if (nblk .eq. 1) then ++ stop 77 ++ endif ++ ++ !------------------------------------------------------------------------------- ++ ! MPI Initialization ++ call setup_mpi(myid, nprocs) ++ ++ STATUS = 0 ++ ++#define REALCASE ++ ++ !------------------------------------------------------------------------------- ++ ! Selection of number of processor rows/columns ++ ! We try to set up the grid square-like, i.e. start the search for possible ++ ! divisors of nprocs with a number next to the square root of nprocs ++ ! and decrement it until a divisor is found. ++ ++ do np_cols = NINT(SQRT(REAL(nprocs))),2,-1 ++ if(mod(nprocs,np_cols) == 0 ) exit ++ enddo ++ ! at the end of the above loop, nprocs is always divisible by np_cols ++ ++ np_rows = nprocs/np_cols ++ ++ if(myid==0) then ++ print * ++ print '(a)','Standard eigenvalue problem - REAL version' ++ print * ++ print '(3(a,i0))','Matrix size=',na,', Number of eigenvectors=',nev,', Block size=',nblk ++ print '(3(a,i0))','Number of processor rows=',np_rows,', cols=',np_cols,', total=',nprocs ++ print * ++ endif ++ ++ !------------------------------------------------------------------------------- ++ ! Set up BLACS context and MPI communicators ++ ! ++ ! The BLACS context is only necessary for using Scalapack. ++ ! ++ ! For ELPA, the MPI communicators along rows/cols are sufficient, ++ ! and the grid setup may be done in an arbitrary way as long as it is ++ ! consistent (i.e. 0<=my_prow<np_rows, 0<=my_pcol<np_cols and every ++ ! process has a unique (my_prow,my_pcol) pair). ++ ++ call set_up_blacsgrid(int(mpi_comm_world,kind=BLAS_KIND), np_rows, np_cols, 'C', & ++ my_blacs_ctxt, my_prow, my_pcol) ++ ++ if (myid==0) then ++ print '(a)','| Past BLACS_Gridinfo.' ++ end if ++ ++ call set_up_blacs_descriptor(na ,nblk, my_prow, my_pcol, np_rows, np_cols, & ++ na_rows, na_cols, sc_desc, my_blacs_ctxt, info) ++ ++ if (myid==0) then ++ print '(a)','| Past scalapack descriptor setup.' ++ end if ++ ++ !------------------------------------------------------------------------------- ++ ! Allocate matrices and set up a test matrix for the eigenvalue problem ++ allocate(a (na_rows,na_cols)) ++ allocate(z (na_rows,na_cols)) ++ allocate(as(na_rows,na_cols)) ++ ++ allocate(ev(na)) ++ ++ call prepare_matrix_random(na, myid, sc_desc, a, z, as) ++ ++ ! set values outside of the bandwidth to zero ++ bandwidth = nblk ++ ++ do local_row = 1, na_rows ++ global_row = index_l2g(local_row, nblk, my_prow, np_rows) ++ do local_col = 1, na_cols ++ global_col = index_l2g(local_col, nblk, my_pcol, np_cols) ++ ++ if (ABS(global_row-global_col) > bandwidth) then ++ a(local_row, local_col) = 0.0 ++ as(local_row, local_col) = 0.0 ++ end if ++ end do ++ end do ++ ++ if (elpa_init(CURRENT_API_VERSION) /= ELPA_OK) then ++ print *, "ELPA API version not supported" ++ stop 1 ++ endif ++ e => elpa_allocate(error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call e%set("na", int(na,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("nev", int(nev,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("local_nrows", int(na_rows,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("local_ncols", int(na_cols,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("nblk", int(nblk,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++#ifdef WITH_MPI ++ call e%set("mpi_comm_parent", int(MPI_COMM_WORLD,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("process_row", int(my_prow,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("process_col", int(my_pcol,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++ ++ call e%set("bandwidth", int(bandwidth,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ assert(e%setup() .eq. ELPA_OK) ++ ++ call e%set("solver", ELPA_SOLVER_2STAGE, error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call e%eigenvectors(a, ev, z, error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call elpa_deallocate(e, error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call elpa_uninit(error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ ++ !------------------------------------------------------------------------------- ++ ! Test correctness of result (using plain scalapack routines) ++ ++ ++ status = check_correctness_evp_numeric_residuals(na, nev, as, z, ev, sc_desc, nblk, myid, np_rows, np_cols, my_prow, my_pcol) ++ ++ ++ deallocate(a) ++ deallocate(as) ++ ++ deallocate(z) ++ deallocate(ev) ++ ++#ifdef WITH_MPI ++ call blacs_gridexit(my_blacs_ctxt) ++ call mpi_finalize(mpierr) ++#endif ++ call EXIT(STATUS) ++end ++ ++!------------------------------------------------------------------------------- +diff -ruN elpa-new_release_2021.11.001/examples/Fortran/elpa2/single_complex_2stage_banded.F90 elpa-new_release_2021.11.001_ok/examples/Fortran/elpa2/single_complex_2stage_banded.F90 +--- elpa-new_release_2021.11.001/examples/Fortran/elpa2/single_complex_2stage_banded.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/Fortran/elpa2/single_complex_2stage_banded.F90 2022-01-26 10:09:19.166040000 +0100 +@@ -0,0 +1,299 @@ ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Max Planck Computing and Data Facility (MPCDF), formerly known as ++! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++! ++! ++#include "config-f90.h" ++ ++ ++ ++#ifdef HAVE_64BIT_INTEGER_MATH_SUPPORT ++#define TEST_INT_TYPE integer(kind=c_int64_t) ++#define INT_TYPE c_int64_t ++#else ++#define TEST_INT_TYPE integer(kind=c_int32_t) ++#define INT_TYPE c_int32_t ++#endif ++#ifdef HAVE_64BIT_INTEGER_MPI_SUPPORT ++#define TEST_INT_MPI_TYPE integer(kind=c_int64_t) ++#define INT_MPI_TYPE c_int64_t ++#else ++#define TEST_INT_MPI_TYPE integer(kind=c_int32_t) ++#define INT_MPI_TYPE c_int32_t ++#endif ++ ++#include "../assert.h" ++!> ++!> Fortran test programm to demonstrates the use of ++!> ELPA 2 complex case library. ++!> If "HAVE_REDIRECT" was defined at build time ++!> the stdout and stderr output of each MPI task ++!> can be redirected to files if the environment ++!> variable "REDIRECT_ELPA_TEST_OUTPUT" is set ++!> to "true". ++!> ++!> By calling executable [arg1] [arg2] [arg3] [arg4] ++!> one can define the size (arg1), the number of ++!> Eigenvectors to compute (arg2), and the blocking (arg3). ++!> If these values are not set default values (500, 150, 16) ++!> are choosen. ++!> If these values are set the 4th argument can be ++!> "output", which specifies that the EV's are written to ++!> an ascii file. ++!> ++!> The complex ELPA 2 kernel is set as the default kernel. ++!> However, this can be overriden by setting ++!> the environment variable "COMPLEX_ELPA_KERNEL" to an ++!> appropiate value. ++!> ++program test_complex2_single_banded ++ ++!------------------------------------------------------------------------------- ++! Standard eigenvalue problem - COMPLEX version ++! ++! This program demonstrates the use of the ELPA module ++! together with standard scalapack routines ++! ++! Copyright of the original code rests with the authors inside the ELPA ++! consortium. The copyright of any additional modifications shall rest ++! with their original authors, but shall adhere to the licensing terms ++! distributed along with the original code in the file "COPYING". ++!------------------------------------------------------------------------------- ++ use elpa ++ ++ use test_util ++ use test_read_input_parameters ++ use test_check_correctness ++ use test_setup_mpi ++ use test_blacs_infrastructure ++ use test_prepare_matrix ++#ifdef HAVE_REDIRECT ++ use test_redirect ++#endif ++ ++ use test_output_type ++ implicit none ++ ++ !------------------------------------------------------------------------------- ++ ! Please set system size parameters below! ++ ! na: System size ++ ! nev: Number of eigenvectors to be calculated ++ ! nblk: Blocking factor in block cyclic distribution ++ !------------------------------------------------------------------------------- ++ ++ TEST_INT_TYPE :: nblk ++ TEST_INT_TYPE :: na, nev ++ ++ TEST_INT_TYPE :: np_rows, np_cols, na_rows, na_cols ++ ++ TEST_INT_TYPE :: myid, nprocs, my_prow, my_pcol, mpi_comm_rows, mpi_comm_cols ++ TEST_INT_TYPE :: i, my_blacs_ctxt, sc_desc(9), info, nprow, npcol ++ TEST_INT_MPI_TYPE :: mpierr ++#ifdef WITH_MPI ++ !TEST_INT_TYPE, external :: numroc ++#endif ++ complex(kind=ck4), parameter :: CZERO = (0.0_rk4,0.0_rk4), CONE = (1.0_rk4,0.0_rk4) ++ real(kind=rk4), allocatable :: ev(:) ++ ++ complex(kind=ck4), allocatable :: a(:,:), z(:,:), as(:,:) ++ ++ TEST_INT_TYPE :: STATUS ++#ifdef WITH_OPENMP_TRADITIONAL ++ TEST_INT_TYPE :: omp_get_max_threads, required_mpi_thread_level, provided_mpi_thread_level ++#endif ++ type(output_t) :: write_to_file ++ integer(kind=ik) :: error_elpa ++ character(len=8) :: task_suffix ++ TEST_INT_TYPE :: j ++ ++ ++ TEST_INT_TYPE :: global_row, global_col, local_row, local_col ++ TEST_INT_TYPE :: bandwidth ++ class(elpa_t), pointer :: e ++ ++#define COMPLEXCASE ++#define DOUBLE_PRECISION_COMPLEX 1 ++ ++ call read_input_parameters(na, nev, nblk, write_to_file) ++ if (nblk .eq. 1) then ++ stop 77 ++ endif ++ ++ !------------------------------------------------------------------------------- ++ ! MPI Initialization ++ call setup_mpi(myid, nprocs) ++ ++ STATUS = 0 ++ ++ !------------------------------------------------------------------------------- ++ ! Selection of number of processor rows/columns ++ ! We try to set up the grid square-like, i.e. start the search for possible ++ ! divisors of nprocs with a number next to the square root of nprocs ++ ! and decrement it until a divisor is found. ++ ++ do np_cols = NINT(SQRT(REAL(nprocs))),2,-1 ++ if(mod(nprocs,np_cols) == 0 ) exit ++ enddo ++ ! at the end of the above loop, nprocs is always divisible by np_cols ++ ++ np_rows = nprocs/np_cols ++ ++ if(myid==0) then ++ print * ++ print '(a)','Standard eigenvalue problem - COMPLEX version' ++ print * ++ print '(3(a,i0))','Matrix size=',na,', Number of eigenvectors=',nev,', Block size=',nblk ++ print '(3(a,i0))','Number of processor rows=',np_rows,', cols=',np_cols,', total=',nprocs ++ print * ++ endif ++ ++ !------------------------------------------------------------------------------- ++ ! Set up BLACS context and MPI communicators ++ ! ++ ! The BLACS context is only necessary for using Scalapack. ++ ! ++ ! For ELPA, the MPI communicators along rows/cols are sufficient, ++ ! and the grid setup may be done in an arbitrary way as long as it is ++ ! consistent (i.e. 0<=my_prow<np_rows, 0<=my_pcol<np_cols and every ++ ! process has a unique (my_prow,my_pcol) pair). ++ ++ call set_up_blacsgrid(int(mpi_comm_world,kind=BLAS_KIND), np_rows, np_cols, 'C', & ++ my_blacs_ctxt, my_prow, my_pcol) ++ ++ if (myid==0) then ++ print '(a)','| Past BLACS_Gridinfo.' ++ end if ++ ++ ! Determine the necessary size of the distributed matrices, ++ ! we use the Scalapack tools routine NUMROC for that. ++ ++ call set_up_blacs_descriptor(na ,nblk, my_prow, my_pcol, np_rows, np_cols, & ++ na_rows, na_cols, sc_desc, my_blacs_ctxt, info) ++ ++ if (myid==0) then ++ print '(a)','| Past scalapack descriptor setup.' ++ end if ++ !------------------------------------------------------------------------------- ++ ! Allocate matrices and set up a test matrix for the eigenvalue problem ++ ++ allocate(a (na_rows,na_cols)) ++ allocate(z (na_rows,na_cols)) ++ allocate(as(na_rows,na_cols)) ++ ++ allocate(ev(na)) ++ ++ call prepare_matrix_random(na, myid, sc_desc, a, z, as) ++ ++ ! set values outside of the bandwidth to zero ++ bandwidth = nblk ++ ++ do local_row = 1, na_rows ++ global_row = index_l2g( local_row, nblk, my_prow, np_rows ) ++ do local_col = 1, na_cols ++ global_col = index_l2g( local_col, nblk, my_pcol, np_cols ) ++ ++ if (ABS(global_row-global_col) > bandwidth) then ++ a(local_row, local_col) = 0 ++ as(local_row, local_col) = 0 ++ end if ++ end do ++ end do ++ ++ if (elpa_init(CURRENT_API_VERSION) /= ELPA_OK) then ++ print *, "ELPA API version not supported" ++ stop 1 ++ endif ++ ++ e => elpa_allocate(error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call e%set("na", int(na,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("nev", int(nev,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("local_nrows", int(na_rows,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("local_ncols", int(na_cols,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("nblk", int(nblk,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++#ifdef WITH_MPI ++ call e%set("mpi_comm_parent", int(MPI_COMM_WORLD,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("process_row", int(my_prow,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("process_col", int(my_pcol,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++ ++ call e%set("bandwidth", int(bandwidth,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ assert(e%setup() .eq. ELPA_OK) ++ ++ call e%set("solver", ELPA_SOLVER_2STAGE, error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%eigenvectors(a, ev, z, error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call elpa_deallocate(e, error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call elpa_uninit(error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ !------------------------------------------------------------------------------- ++ ! Test correctness of result (using plain scalapack routines) ++ status = check_correctness_evp_numeric_residuals(na, nev, as, z, ev, sc_desc, nblk, myid, np_rows, np_cols, my_prow, my_pcol) ++ ++ deallocate(a) ++ deallocate(as) ++ ++ deallocate(z) ++ deallocate(ev) ++ ++#ifdef WITH_MPI ++ call blacs_gridexit(my_blacs_ctxt) ++ call mpi_finalize(mpierr) ++#endif ++ call EXIT(STATUS) ++end ++ ++!------------------------------------------------------------------------------- +diff -ruN elpa-new_release_2021.11.001/examples/Fortran/elpa2/single_real_2stage_banded.F90 elpa-new_release_2021.11.001_ok/examples/Fortran/elpa2/single_real_2stage_banded.F90 +--- elpa-new_release_2021.11.001/examples/Fortran/elpa2/single_real_2stage_banded.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/Fortran/elpa2/single_real_2stage_banded.F90 2022-01-26 10:09:19.166871000 +0100 +@@ -0,0 +1,290 @@ ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Max Planck Computing and Data Facility (MPCDF), formerly known as ++! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++! ++! ++#include "config-f90.h" ++ ++ ++#ifdef HAVE_64BIT_INTEGER_MATH_SUPPORT ++#define TEST_INT_TYPE integer(kind=c_int64_t) ++#define INT_TYPE c_int64_t ++#else ++#define TEST_INT_TYPE integer(kind=c_int32_t) ++#define INT_TYPE c_int32_t ++#endif ++#ifdef HAVE_64BIT_INTEGER_MPI_SUPPORT ++#define TEST_INT_MPI_TYPE integer(kind=c_int64_t) ++#define INT_MPI_TYPE c_int64_t ++#else ++#define TEST_INT_MPI_TYPE integer(kind=c_int32_t) ++#define INT_MPI_TYPE c_int32_t ++#endif ++#include "../assert.h" ++!> ++!> Fortran test programm to demonstrates the use of ++!> ELPA 2 real case library. ++!> If "HAVE_REDIRECT" was defined at build time ++!> the stdout and stderr output of each MPI task ++!> can be redirected to files if the environment ++!> variable "REDIRECT_ELPA_TEST_OUTPUT" is set ++!> to "true". ++!> ++!> By calling executable [arg1] [arg2] [arg3] [arg4] ++!> one can define the size (arg1), the number of ++!> Eigenvectors to compute (arg2), and the blocking (arg3). ++!> If these values are not set default values (500, 150, 16) ++!> are choosen. ++!> If these values are set the 4th argument can be ++!> "output", which specifies that the EV's are written to ++!> an ascii file. ++!> ++!> The real ELPA 2 kernel is set as the default kernel. ++!> However, this can be overriden by setting ++!> the environment variable "REAL_ELPA_KERNEL" to an ++!> appropiate value. ++!> ++program test_real2_single_banded ++ ++!------------------------------------------------------------------------------- ++! Standard eigenvalue problem - REAL version ++! ++! This program demonstrates the use of the ELPA module ++! together with standard scalapack routines ++! ++! Copyright of the original code rests with the authors inside the ELPA ++! consortium. The copyright of any additional modifications shall rest ++! with their original authors, but shall adhere to the licensing terms ++! distributed along with the original code in the file "COPYING". ++! ++!------------------------------------------------------------------------------- ++ use elpa ++ ++ !use test_util ++ use test_read_input_parameters ++ use test_check_correctness ++ use test_setup_mpi ++ use test_blacs_infrastructure ++ use test_prepare_matrix ++#ifdef HAVE_REDIRECT ++ use test_redirect ++#endif ++ use test_output_type ++ use tests_scalapack_interfaces ++ implicit none ++ ++ !------------------------------------------------------------------------------- ++ ! Please set system size parameters below! ++ ! na: System size ++ ! nev: Number of eigenvectors to be calculated ++ ! nblk: Blocking factor in block cyclic distribution ++ !------------------------------------------------------------------------------- ++ ++ TEST_INT_TYPE :: nblk ++ TEST_INT_TYPE :: na, nev ++ ++ TEST_INT_TYPE :: np_rows, np_cols, na_rows, na_cols ++ ++ TEST_INT_TYPE :: myid, nprocs, my_prow, my_pcol, mpi_comm_rows, mpi_comm_cols ++ TEST_INT_TYPE :: i, my_blacs_ctxt, sc_desc(9), info, nprow, npcol ++ TEST_INT_MPI_TYPE :: mpierr ++ ++ real(kind=rk4), allocatable :: a(:,:), z(:,:), as(:,:), ev(:) ++ ++ TEST_INT_TYPE :: STATUS ++#ifdef WITH_OPENMP_TRADITIONAL ++ TEST_INT_TYPE :: omp_get_max_threads, required_mpi_thread_level, provided_mpi_thread_level ++#endif ++ integer(kind=c_int) :: error_elpa ++ type(output_t) :: write_to_file ++ character(len=8) :: task_suffix ++ TEST_INT_TYPE :: j ++ TEST_INT_TYPE :: global_row, global_col, local_row, local_col ++ TEST_INT_TYPE :: bandwidth ++ class(elpa_t), pointer :: e ++#define DOUBLE_PRECISION_REAL 1 ++ ++ call read_input_parameters(na, nev, nblk, write_to_file) ++ if (nblk .eq. 1) then ++ stop 77 ++ endif ++ ++ !------------------------------------------------------------------------------- ++ ! MPI Initialization ++ call setup_mpi(myid, nprocs) ++ ++ ++ STATUS = 0 ++ ++#define REALCASE ++ ++ !------------------------------------------------------------------------------- ++ ! Selection of number of processor rows/columns ++ ! We try to set up the grid square-like, i.e. start the search for possible ++ ! divisors of nprocs with a number next to the square root of nprocs ++ ! and decrement it until a divisor is found. ++ ++ do np_cols = NINT(SQRT(REAL(nprocs))),2,-1 ++ if(mod(nprocs,np_cols) == 0 ) exit ++ enddo ++ ! at the end of the above loop, nprocs is always divisible by np_cols ++ ++ np_rows = nprocs/np_cols ++ ++ if(myid==0) then ++ print * ++ print '(a)','Standard eigenvalue problem - REAL version' ++ print * ++ print '(3(a,i0))','Matrix size=',na,', Number of eigenvectors=',nev,', Block size=',nblk ++ print '(3(a,i0))','Number of processor rows=',np_rows,', cols=',np_cols,', total=',nprocs ++ print * ++ endif ++ ++ !------------------------------------------------------------------------------- ++ ! Set up BLACS context and MPI communicators ++ ! ++ ! The BLACS context is only necessary for using Scalapack. ++ ! ++ ! For ELPA, the MPI communicators along rows/cols are sufficient, ++ ! and the grid setup may be done in an arbitrary way as long as it is ++ ! consistent (i.e. 0<=my_prow<np_rows, 0<=my_pcol<np_cols and every ++ ! process has a unique (my_prow,my_pcol) pair). ++ ++ call set_up_blacsgrid(int(mpi_comm_world,kind=BLAS_KIND), np_rows, np_cols, 'C', & ++ my_blacs_ctxt, my_prow, my_pcol) ++ ++ if (myid==0) then ++ print '(a)','| Past BLACS_Gridinfo.' ++ end if ++ ++ call set_up_blacs_descriptor(na ,nblk, my_prow, my_pcol, np_rows, np_cols, & ++ na_rows, na_cols, sc_desc, my_blacs_ctxt, info) ++ ++ if (myid==0) then ++ print '(a)','| Past scalapack descriptor setup.' ++ end if ++ ++ !------------------------------------------------------------------------------- ++ ! Allocate matrices and set up a test matrix for the eigenvalue problem ++ allocate(a (na_rows,na_cols)) ++ allocate(z (na_rows,na_cols)) ++ allocate(as(na_rows,na_cols)) ++ ++ allocate(ev(na)) ++ ++ call prepare_matrix_random(na, myid, sc_desc, a, z, as) ++ ++ ! set values outside of the bandwidth to zero ++ bandwidth = nblk ++ ++ do local_row = 1, na_rows ++ global_row = index_l2g( local_row, nblk, my_prow, np_rows ) ++ do local_col = 1, na_cols ++ global_col = index_l2g( local_col, nblk, my_pcol, np_cols ) ++ ++ if (ABS(global_row-global_col) > bandwidth) then ++ a(local_row, local_col) = 0.0 ++ as(local_row, local_col) = 0.0 ++ end if ++ end do ++ end do ++ ++ if (elpa_init(CURRENT_API_VERSION) /= ELPA_OK) then ++ print *, "ELPA API version not supported" ++ stop 1 ++ endif ++ e => elpa_allocate(error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call e%set("na", int(na,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("nev", int(nev,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("local_nrows", int(na_rows,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("local_ncols", int(na_cols,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("nblk", int(nblk,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++#ifdef WITH_MPI ++ call e%set("mpi_comm_parent", int(MPI_COMM_WORLD,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("process_row", int(my_prow,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("process_col", int(my_pcol,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++ ++ call e%set("bandwidth", int(bandwidth,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ assert(e%setup() .eq. ELPA_OK) ++ ++ call e%set("solver", ELPA_SOLVER_2STAGE, error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call e%eigenvectors(a, ev, z, error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call elpa_deallocate(e, error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call elpa_uninit(error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ ++ !------------------------------------------------------------------------------- ++ ! Test correctness of result (using plain scalapack routines) ++ ++ status = check_correctness_evp_numeric_residuals(na, nev, as, z, ev, sc_desc, nblk, myid, np_rows, np_cols, my_prow, my_pcol) ++ deallocate(a) ++ deallocate(as) ++ ++ deallocate(z) ++ deallocate(ev) ++ ++#ifdef WITH_MPI ++ call blacs_gridexit(my_blacs_ctxt) ++ call mpi_finalize(mpierr) ++#endif ++ call EXIT(STATUS) ++end ++ ++!------------------------------------------------------------------------------- +diff -ruN elpa-new_release_2021.11.001/examples/Fortran/elpa_generalized/test_bindings.F90 elpa-new_release_2021.11.001_ok/examples/Fortran/elpa_generalized/test_bindings.F90 +--- elpa-new_release_2021.11.001/examples/Fortran/elpa_generalized/test_bindings.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/Fortran/elpa_generalized/test_bindings.F90 2022-01-26 10:09:56.136747000 +0100 +@@ -0,0 +1,168 @@ ++#include "config-f90.h" ++ ++#include "../assert.h" ++ ++program test_bindings ++ use elpa ++ ++ use test_util ++ use test_setup_mpi ++! use test_prepare_matrix ++ use test_read_input_parameters ++ use test_blacs_infrastructure ++! use test_check_correctness ++! use test_analytic ++! use test_scalapack ++ ++ ++ implicit none ++ ++#include "src/elpa_generated_fortran_interfaces.h" ++ ++ ! matrix dimensions ++ integer :: na, nev, nblk ++ ++ ! mpi ++ integer :: myid, nprocs ++ integer :: na_cols, na_rows ! local matrix size ++ integer :: np_cols, np_rows ! number of MPI processes per column/row ++ integer :: my_prow, my_pcol ! local MPI task position (my_prow, my_pcol) in the grid (0..np_cols -1, 0..np_rows -1) ++ integer :: mpierr, mpi_comm_rows, mpi_comm_cols ++ type(output_t) :: write_to_file ++ ++ ! blacs ++ integer :: my_blacs_ctxt, sc_desc(9), info, nprow, npcol, i, j ++ character(len=1) :: layout ++ ++ ++ ! The Matrix ++ real(kind=C_DOUBLE) , allocatable :: a(:,:), res(:,:) ++ ++ logical :: skip_check_correctness ++ ++ class(elpa_t), pointer :: e ++ ++ integer :: error, status ++ ++ call read_input_parameters_traditional(na, nev, nblk, write_to_file, skip_check_correctness) ++ call setup_mpi(myid, nprocs) ++#ifdef WITH_MPI ++ call MPI_BARRIER(MPI_COMM_WORLD, mpierr) ++ !call redirect_stdout(myid) ++#endif ++ ++ if (elpa_init(CURRENT_API_VERSION) /= ELPA_OK) then ++ print *, "ELPA API version not supported" ++ stop 1 ++ endif ++ ++ layout = 'C' ++ do np_cols = NINT(SQRT(REAL(nprocs))),2,-1 ++ if(mod(nprocs,np_cols) == 0 ) exit ++ enddo ++ ++ np_rows = nprocs/np_cols ++ assert(nprocs == np_rows * np_cols) ++ ++ if (myid == 0) then ++ print '((a,i0))', 'Matrix size: ', na ++ print '((a,i0))', 'Num eigenvectors: ', nev ++ print '((a,i0))', 'Blocksize: ', nblk ++#ifdef WITH_MPI ++ print '((a,i0))', 'Num MPI proc: ', nprocs ++ print '(3(a,i0))','Number of processor rows=',np_rows,', cols=',np_cols,', total=',nprocs ++ print '(a)', 'Process layout: ' // layout ++#endif ++ print *,'' ++ endif ++ ++ ++ call set_up_blacsgrid(mpi_comm_world, np_rows, np_cols, layout, & ++ my_blacs_ctxt, my_prow, my_pcol) ++ ++ call set_up_blacs_descriptor(na, nblk, my_prow, my_pcol, np_rows, np_cols, & ++ na_rows, na_cols, sc_desc, my_blacs_ctxt, info) ++ ++ allocate(a (na_rows,na_cols)) ++ allocate(res(na_rows,na_cols)) ++ ++ e => elpa_allocate(error) ++ assert_elpa_ok(error) ++ ++ call e%set("na", na, error) ++ assert_elpa_ok(error) ++ call e%set("nev", nev, error) ++ assert_elpa_ok(error) ++ call e%set("local_nrows", na_rows, error) ++ assert_elpa_ok(error) ++ call e%set("local_ncols", na_cols, error) ++ assert_elpa_ok(error) ++ call e%set("nblk", nblk, error) ++ assert_elpa_ok(error) ++ ++#ifdef WITH_MPI ++ call e%set("mpi_comm_parent", MPI_COMM_WORLD, error) ++ assert_elpa_ok(error) ++ call e%set("process_row", my_prow, error) ++ assert_elpa_ok(error) ++ call e%set("process_col", my_pcol, error) ++ assert_elpa_ok(error) ++#endif ++ ++ call e%get("mpi_comm_rows",mpi_comm_rows, error) ++ assert_elpa_ok(error) ++ call e%get("mpi_comm_cols",mpi_comm_cols, error) ++ assert_elpa_ok(error) ++ ++ a(:,:) = 1.0 ++ res(:,:) = 0.0 ++ ++ call test_c_bindings(a, na_rows, na_cols, np_rows, np_cols, my_prow, my_pcol, sc_desc, res, mpi_comm_rows, mpi_comm_cols) ++ ++ status = 0 ++ do i = 1, na_rows ++ do j = 1, na_cols ++ if(a(i,j) .ne. 1.0) then ++ write(*,*) i, j, ": wrong value of A: ", a(i,j), ", should be 1" ++ status = 1 ++ endif ++ if(res(i,j) .ne. 3.0) then ++ write(*,*) i, j, ": wrong value of res: ", res(i,j), ", should be 3" ++ status = 1 ++ endif ++ enddo ++ enddo ++ ++ call check_status(status, myid) ++ ++ call elpa_deallocate(e, error) ++ assert_elpa_ok(error) ++ ++ deallocate(a) ++ deallocate(res) ++ call elpa_uninit(error) ++ assert_elpa_ok(error) ++ ++#ifdef WITH_MPI ++ call blacs_gridexit(my_blacs_ctxt) ++ call mpi_finalize(mpierr) ++#endif ++ ++ call exit(status) ++ ++ contains ++ ++ subroutine check_status(status, myid) ++ implicit none ++ integer, intent(in) :: status, myid ++ integer :: mpierr ++ if (status /= 0) then ++ if (myid == 0) print *, "Result incorrect!" ++#ifdef WITH_MPI ++ call mpi_finalize(mpierr) ++#endif ++ call exit(status) ++ endif ++ end subroutine ++ ++end program +diff -ruN elpa-new_release_2021.11.001/examples/Fortran/elpa_print_headers.F90 elpa-new_release_2021.11.001_ok/examples/Fortran/elpa_print_headers.F90 +--- elpa-new_release_2021.11.001/examples/Fortran/elpa_print_headers.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/Fortran/elpa_print_headers.F90 2022-01-26 10:04:52.441241000 +0100 +@@ -0,0 +1,282 @@ ++#if 0 ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Max Planck Computing and Data Facility (MPCDF), formerly known as ++! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++! ++! ++! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines ++! ++! Copyright of the original code rests with the authors inside the ELPA ++! consortium. The copyright of any additional modifications shall rest ++! with their original authors, but shall adhere to the licensing terms ++! distributed along with the original code in the file "COPYING". ++#endif ++ ++#ifdef WITH_OPENMP_TRADITIONAL ++ if (myid .eq. 0) then ++ print *,"Threaded version of test program" ++ print *,"Using ",omp_get_max_threads()," threads" ++ print *," " ++ endif ++#endif ++ ++#ifndef WITH_MPI ++ if (myid .eq. 0) then ++ print *,"This version of ELPA does not support MPI parallelisation" ++ print *,"For MPI support re-build ELPA with appropiate flags" ++ print *," " ++ endif ++#endif ++ ++#ifdef ELPA1 ++ ++#ifdef REALCASE ++#ifdef DOUBLE_PRECISION_REAL ++ if (myid .eq. 0) then ++ print *," " ++ print *,"Real valued double-precision version of ELPA1 is used" ++ print *," " ++ endif ++#else ++ if (myid .eq. 0) then ++ print *," " ++ print *,"Real valued single-precision version of ELPA1 is used" ++ print *," " ++ endif ++#endif ++ ++#endif ++ ++#ifdef COMPLEXCASE ++#ifdef DOUBLE_PRECISION_COMPLEX ++ if (myid .eq. 0) then ++ print *," " ++ print *,"Complex valued double-precision version of ELPA1 is used" ++ print *," " ++ endif ++#else ++ if (myid .eq. 0) then ++ print *," " ++ print *,"Complex valued single-precision version of ELPA1 is used" ++ print *," " ++ endif ++#endif ++ ++#endif /* DATATYPE */ ++ ++#else /* ELPA1 */ ++ ++#ifdef REALCASE ++#ifdef DOUBLE_PRECISION_REAL ++ if (myid .eq. 0) then ++ print *," " ++ print *,"Real valued double-precision version of ELPA2 is used" ++ print *," " ++ endif ++#else ++ if (myid .eq. 0) then ++ print *," " ++ print *,"Real valued single-precision version of ELPA2 is used" ++ print *," " ++ endif ++#endif ++ ++#endif ++ ++#ifdef COMPLEXCASE ++#ifdef DOUBLE_PRECISION_COMPLEX ++ if (myid .eq. 0) then ++ print *," " ++ print *,"Complex valued double-precision version of ELPA2 is used" ++ print *," " ++ endif ++#else ++ if (myid .eq. 0) then ++ print *," " ++ print *,"Complex valued single-precision version of ELPA2 is used" ++ print *," " ++ endif ++#endif ++ ++#endif /* DATATYPE */ ++ ++#endif /* ELPA1 */ ++ ++#ifdef WITH_MPI ++ call MPI_BARRIER(MPI_COMM_WORLD, mpierr) ++#endif ++#ifdef HAVE_REDIRECT ++ if (check_redirect_environment_variable()) then ++ if (myid .eq. 0) then ++ print *," " ++ print *,"Redirection of mpi processes is used" ++ print *," " ++ if (create_directories() .ne. 1) then ++ write(error_unit,*) "Unable to create directory for stdout and stderr!" ++ stop 1 ++ endif ++ endif ++#ifdef WITH_MPI ++ call MPI_BARRIER(MPI_COMM_WORLD, mpierr) ++#endif ++ call redirect_stdout(myid) ++ endif ++#endif ++ ++#ifndef ELPA1 ++ ++ if (myid .eq. 0) then ++ print *," " ++ print *,"This ELPA2 is build with" ++#ifdef WITH_NVIDIA_GPU_KERNEL ++ print *,"CUDA GPU support" ++#endif ++#ifdef WITH_NVIDIA_SM80_GPU_KERNEL ++ print *,"CUDA sm_80 GPU support" ++#endif ++#ifdef WITH_INTEL_GPU_KERNEL ++ print *,"INTEL GPU support" ++#endif ++#ifdef WITH_AMD_GPU_KERNEL ++ print *,"AMD GPU support" ++#endif ++ print *," " ++#ifdef REALCASE ++ ++#ifdef HAVE_AVX2 ++ ++#ifdef WITH_REAL_AVX_BLOCK2_KERNEL ++ print *,"AVX2 optimized kernel (2 blocking) for real matrices" ++#endif ++#ifdef WITH_REAL_AVX_BLOCK4_KERNEL ++ print *,"AVX2 optimized kernel (4 blocking) for real matrices" ++#endif ++#ifdef WITH_REAL_AVX_BLOCK6_KERNEL ++ print *,"AVX2 optimized kernel (6 blocking) for real matrices" ++#endif ++ ++#else /* no HAVE_AVX2 */ ++ ++#ifdef HAVE_AVX ++ ++#ifdef WITH_REAL_AVX_BLOCK2_KERNEL ++ print *,"AVX optimized kernel (2 blocking) for real matrices" ++#endif ++#ifdef WITH_REAL_AVX_BLOCK4_KERNEL ++ print *,"AVX optimized kernel (4 blocking) for real matrices" ++#endif ++#ifdef WITH_REAL_AVX_BLOCK6_KERNEL ++ print *,"AVX optimized kernel (6 blocking) for real matrices" ++#endif ++ ++#endif ++ ++#endif /* HAVE_AVX2 */ ++ ++ ++#ifdef WITH_REAL_GENERIC_KERNEL ++ print *,"GENERIC kernel for real matrices" ++#endif ++#ifdef WITH_REAL_GENERIC_SIMPLE_KERNEL ++ print *,"GENERIC SIMPLE kernel for real matrices" ++#endif ++#ifdef WITH_REAL_SSE_ASSEMBLY_KERNEL ++ print *,"SSE ASSEMBLER kernel for real matrices" ++#endif ++#ifdef WITH_REAL_BGP_KERNEL ++ print *,"BGP kernel for real matrices" ++#endif ++#ifdef WITH_REAL_BGQ_KERNEL ++ print *,"BGQ kernel for real matrices" ++#endif ++ ++#endif /* DATATYPE == REAL */ ++ ++#ifdef COMPLEXCASE ++ ++#ifdef HAVE_AVX2 ++ ++#ifdef WITH_COMPLEX_AVX_BLOCK2_KERNEL ++ print *,"AVX2 optimized kernel (2 blocking) for complex matrices" ++#endif ++#ifdef WITH_COMPLEX_AVX_BLOCK1_KERNEL ++ print *,"AVX2 optimized kernel (1 blocking) for complex matrices" ++#endif ++ ++#else /* no HAVE_AVX2 */ ++ ++#ifdef HAVE_AVX ++ ++#ifdef WITH_COMPLEX_AVX_BLOCK2_KERNEL ++ print *,"AVX optimized kernel (2 blocking) for complex matrices" ++#endif ++#ifdef WITH_COMPLEX_AVX_BLOCK1_KERNEL ++ print *,"AVX optimized kernel (1 blocking) for complex matrices" ++#endif ++ ++#endif ++ ++#endif /* HAVE_AVX2 */ ++ ++ ++#ifdef WITH_COMPLEX_GENERIC_KERNEL ++ print *,"GENERIC kernel for complex matrices" ++#endif ++#ifdef WITH_COMPLEX_GENERIC_SIMPLE_KERNEL ++ print *,"GENERIC SIMPLE kernel for complex matrices" ++#endif ++#ifdef WITH_COMPLEX_SSE_ASSEMBLY_KERNEL ++ print *,"SSE ASSEMBLER kernel for complex matrices" ++#endif ++ ++#endif /* DATATYPE == COMPLEX */ ++ ++ endif ++#endif /* ELPA1 */ ++ ++ if (write_to_file%eigenvectors) then ++ if (myid .eq. 0) print *,"Writing Eigenvectors to files" ++ endif ++ ++ if (write_to_file%eigenvalues) then ++ if (myid .eq. 0) print *,"Writing Eigenvalues to files" ++ endif ++ ++ +diff -ruN elpa-new_release_2021.11.001/examples/Fortran/Makefile_examples_hybrid elpa-new_release_2021.11.001_ok/examples/Fortran/Makefile_examples_hybrid +--- elpa-new_release_2021.11.001/examples/Fortran/Makefile_examples_hybrid 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/Fortran/Makefile_examples_hybrid 2022-01-28 09:55:45.556223000 +0100 +@@ -0,0 +1,38 @@ ++# MPICH, that is IntelMPI or ParaStationMPI ++SCALAPACK_LIB = -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64 ++# OpenMPI ++# SCALAPACK_LIB = -lmkl_scalapack_lp64 $(MKLROOT)/lib/intel64/libmkl_blacs_openmpi_lp64.a ++LAPACK_LIB = ++# Intel compiler ++MKL = -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lstdc++ ++# GCC ++# MKL = -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lstdc++ -lm ++F90 = mpif90 -O3 -qopenmp -I$(ELPA_MODULES_OPENMP) -I$(ELPA_MODULES) -I$(ELPA_INCLUDE_OPENMP) -I$(ELPA_INCLUDE_OPENMP)/elpa ++# GCC ++# F90 = mpif90 -O3 -fopenmp -I$(ELPA_MODULES_OPENMP) -I$(ELPA_MODULES) -I$(ELPA_INCLUDE_OPENMP) -I$(ELPA_INCLUDE_OPENMP)/elpa ++LIBS = -L$(ELPA_LIB) -lelpatest_openmp -lelpa_openmp $(SCALAPACK_LIB) $(MKL) ++# CC = mpicc -qopenmp -O3 ++# GCC ++# CC = mpicc -fopenmp -O3 ++ ++all: test_real_1stage_omp test_real_2stage_all_kernels_omp test_autotune_omp test_multiple_objs_omp test_split_comm_omp test_skewsymmetric_omp ++ ++test_real_1stage_omp: test.F90 ++ /usr/bin/cpp -P -DTEST_GPU=0 -DTEST_REAL -DTEST_DOUBLE -DTEST_SOLVER_1STAGE -DTEST_EIGENVECTORS -DWITH_OPENMP_TRADITIONAL -DCURRENT_API_VERSION=20211125 -DWITH_MPI -I$(ELPA_INCLUDE_OPENMP)/elpa -o test_real_1stage_omp.F90 test.F90 ++ $(F90) -o $@ test_real_1stage_omp.F90 $(LIBS) ++ ++test_real_2stage_all_kernels_omp: test.F90 ++ /usr/bin/cpp -P -DTEST_GPU=0 -DTEST_REAL -DTEST_DOUBLE -DTEST_SOLVER_2STAGE -DTEST_EIGENVECTORS -DTEST_ALL_KERNELS -DWITH_OPENMP_TRADITIONAL -DCURRENT_API_VERSION=20211125 -DWITH_MPI -I$(ELPA_INCLUDE_OPENMP)/elpa -o test_real_2stage_all_kernels_omp.F90 test.F90 ++ $(F90) -o $@ test_real_2stage_all_kernels_omp.F90 $(LIBS) ++ ++test_autotune_omp: test_autotune.F90 ++ $(F90) -DTEST_REAL -DTEST_DOUBLE -DWITH_MPI -DWITH_OPENMP_TRADITIONAL -DCURRENT_API_VERSION=20211125 -I$(ELPA_INCLUDE_OPENMP)/elpa -o $@ test_autotune.F90 $(LIBS) ++ ++test_multiple_objs_omp: test_multiple_objs.F90 ++ $(F90) -DTEST_REAL -DTEST_DOUBLE -DWITH_MPI -DWITH_OPENMP_TRADITIONAL -DCURRENT_API_VERSION=20211125 -I$(ELPA_INCLUDE_OPENMP)/elpa -o $@ test_multiple_objs.F90 $(LIBS) ++ ++test_split_comm_omp: test_split_comm.F90 ++ $(F90) -DTEST_REAL -DTEST_DOUBLE -DWITH_MPI -DWITH_OPENMP_TRADITIONAL -DCURRENT_API_VERSION=20211125 -I$(ELPA_INCLUDE_OPENMP)/elpa -o $@ test_split_comm.F90 $(LIBS) ++ ++test_skewsymmetric_omp: test_skewsymmetric.F90 ++ $(F90) -DTEST_REAL -DTEST_DOUBLE -DWITH_MPI -DWITH_OPENMP_TRADITIONAL -DCURRENT_API_VERSION=20211125 -I$(ELPA_INCLUDE_OPENMP)/elpa -o $@ test_skewsymmetric.F90 $(LIBS) +diff -ruN elpa-new_release_2021.11.001/examples/Fortran/Makefile_examples_pure elpa-new_release_2021.11.001_ok/examples/Fortran/Makefile_examples_pure +--- elpa-new_release_2021.11.001/examples/Fortran/Makefile_examples_pure 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/Fortran/Makefile_examples_pure 2022-01-28 09:54:41.176236000 +0100 +@@ -0,0 +1,34 @@ ++# MPICH, that is IntelMPI or ParaStationMPI ++SCALAPACK_LIB = -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64 ++# OpenMPI ++# SCALAPACK_LIB = -lmkl_scalapack_lp64 $(MKLROOT)/lib/intel64/libmkl_blacs_openmpi_lp64.a ++LAPACK_LIB = ++# Intel compiler ++MKL = -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -liomp5 -lpthread -lstdc++ ++# GCC ++# MKL = -lmkl_gf_lp64 -lmkl_sequential -lmkl_core -lgomp -lpthread -lstdc++ -lm ++F90 = mpif90 -O3 -I$(ELPA_MODULES) -I$(ELPA_INCLUDE) -I$(ELPA_INCLUDE)/elpa ++LIBS = -L$(ELPA_LIB) -lelpa -lelpatest $(SCALAPACK_LIB) $(MKL) ++# CC = mpicc -O3 ++ ++all: test_real_1stage test_real_2stage_all_kernels test_autotune test_multiple_objs test_split_comm test_skewsymmetric ++ ++test_real_1stage: test.F90 ++ /usr/bin/cpp -P -DTEST_GPU=0 -DTEST_REAL -DTEST_DOUBLE -DTEST_SOLVER_1STAGE -DTEST_EIGENVECTORS -DWITH_MPI -DCURRENT_API_VERSION=20211125 -I$(ELPA_INCLUDE)/elpa -o test_real_1stage.F90 test.F90 ++ $(F90) -o $@ test_real_1stage.F90 $(LIBS) ++ ++test_real_2stage_all_kernels: test.F90 ++ /usr/bin/cpp -P -DTEST_GPU=0 -DTEST_REAL -DTEST_DOUBLE -DTEST_SOLVER_2STAGE -DTEST_EIGENVECTORS -DTEST_ALL_KERNELS -DWITH_MPI -DCURRENT_API_VERSION=20211125 -I$(ELPA_INCLUDE)/elpa -o test_real_2stage_all_kernels.F90 test.F90 ++ $(F90) -o $@ test_real_2stage_all_kernels.F90 $(LIBS) ++ ++test_autotune: test_autotune.F90 ++ $(F90) -DTEST_REAL -DTEST_DOUBLE -DWITH_MPI -DCURRENT_API_VERSION=20211125 -I$(ELPA_INCLUDE)/elpa -o $@ test_autotune.F90 $(LIBS) ++ ++test_multiple_objs: test_multiple_objs.F90 ++ $(F90) -DTEST_REAL -DTEST_DOUBLE -DWITH_MPI -DCURRENT_API_VERSION=20211125 -I$(ELPA_INCLUDE)/elpa -o $@ test_multiple_objs.F90 $(LIBS) ++ ++test_split_comm: test_split_comm.F90 ++ $(F90) -DTEST_REAL -DTEST_DOUBLE -DWITH_MPI -DCURRENT_API_VERSION=20211125 -I$(ELPA_INCLUDE)/elpa -o $@ test_split_comm.F90 $(LIBS) ++ ++test_skewsymmetric: test_skewsymmetric.F90 ++ $(F90) -DTEST_REAL -DTEST_DOUBLE -DWITH_MPI -DCURRENT_API_VERSION=20211125 -I$(ELPA_INCLUDE)/elpa -o $@ test_skewsymmetric.F90 $(LIBS) +diff -ruN elpa-new_release_2021.11.001/examples/Fortran/Makefile_examples_pure_cuda elpa-new_release_2021.11.001_ok/examples/Fortran/Makefile_examples_pure_cuda +--- elpa-new_release_2021.11.001/examples/Fortran/Makefile_examples_pure_cuda 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/Fortran/Makefile_examples_pure_cuda 2022-01-28 09:54:52.690358000 +0100 +@@ -0,0 +1,34 @@ ++# MPICH, that is IntelMPI or ParaStationMPI ++SCALAPACK_LIB = -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64 ++# OpenMPI ++# SCALAPACK_LIB = -lmkl_scalapack_lp64 $(MKLROOT)/lib/intel64/libmkl_blacs_openmpi_lp64.a ++LAPACK_LIB = ++# Intel compiler ++MKL = -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -liomp5 -lpthread -lstdc++ ++# GCC ++# MKL = -lmkl_gf_lp64 -lmkl_sequential -lmkl_core -lgomp -lpthread -lstdc++ -lm ++F90 = mpif90 -O3 -I$(ELPA_MODULES) -I$(ELPA_INCLUDE) -I$(ELPA_INCLUDE)/elpa ++LIBS = -L$(ELPA_LIB) -lelpa -lelpatest $(SCALAPACK_LIB) $(MKL) -lcudart ++# CC = mpicc -O3 ++ ++all: test_real_1stage test_real_2stage_all_kernels test_autotune test_multiple_objs test_split_comm test_skewsymmetric ++ ++test_real_1stage: test.F90 ++ /usr/bin/cpp -P -DTEST_NVIDIA_GPU=1 -DTEST_REAL -DTEST_DOUBLE -DTEST_SOLVER_1STAGE -DTEST_EIGENVECTORS -DWITH_MPI -DCURRENT_API_VERSION=20211125 -I$(ELPA_INCLUDE)/elpa -o test_real_1stage.F90 test.F90 ++ $(F90) -o $@ test_real_1stage.F90 $(LIBS) ++ ++test_real_2stage_all_kernels: test.F90 ++ /usr/bin/cpp -P -DTEST_NVIDIA_GPU=1 -DTEST_REAL -DTEST_DOUBLE -DTEST_SOLVER_2STAGE -DTEST_EIGENVECTORS -DTEST_ALL_KERNELS -DWITH_MPI -DCURRENT_API_VERSION=20211125 -I$(ELPA_INCLUDE)/elpa -o test_real_2stage_all_kernels.F90 test.F90 ++ $(F90) -o $@ test_real_2stage_all_kernels.F90 $(LIBS) ++ ++test_autotune: test_autotune.F90 ++ $(F90) -DTEST_REAL -DTEST_NVIDIA_GPU=1 -DTEST_DOUBLE -DWITH_MPI -DCURRENT_API_VERSION=20211125 -I$(ELPA_INCLUDE)/elpa -o $@ test_autotune.F90 $(LIBS) ++ ++test_multiple_objs: test_multiple_objs.F90 ++ $(F90) -DTEST_REAL -DTEST_NVIDIA_GPU=1 -DTEST_DOUBLE -DWITH_MPI -DCURRENT_API_VERSION=20211125 -I$(ELPA_INCLUDE)/elpa -o $@ test_multiple_objs.F90 $(LIBS) ++ ++test_split_comm: test_split_comm.F90 ++ $(F90) -DTEST_NVIDIA_GPU=1 -DTEST_REAL -DTEST_DOUBLE -DWITH_MPI -DCURRENT_API_VERSION=20211125 -I$(ELPA_INCLUDE)/elpa -o $@ test_split_comm.F90 $(LIBS) ++ ++test_skewsymmetric: test_skewsymmetric.F90 ++ $(F90) -DTEST_REAL -DTEST_NVIDIA_GPU=1 -DTEST_DOUBLE -DWITH_MPI -DCURRENT_API_VERSION=20211125 -I$(ELPA_INCLUDE)/elpa -o $@ test_skewsymmetric.F90 $(LIBS) +diff -ruN elpa-new_release_2021.11.001/examples/Fortran/test_autotune.F90 elpa-new_release_2021.11.001_ok/examples/Fortran/test_autotune.F90 +--- elpa-new_release_2021.11.001/examples/Fortran/test_autotune.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/Fortran/test_autotune.F90 2022-01-28 18:31:05.305617544 +0100 +@@ -0,0 +1,345 @@ ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Max Planck Computing and Data Facility (MPCDF), formerly known as ++! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++! ++! ++#include "config-f90.h" ++ ++! Define one of TEST_REAL or TEST_COMPLEX ++! Define one of TEST_SINGLE or TEST_DOUBLE ++! Define one of TEST_SOLVER_1STAGE or TEST_SOLVER_2STAGE ++! Define TEST_NVIDIA_GPU \in [0, 1] ++! Define TEST_INTEL_GPU \in [0, 1] ++! Define TEST_AMD_GPU \in [0, 1] ++! Define either TEST_ALL_KERNELS or a TEST_KERNEL \in [any valid kernel] ++ ++#if !(defined(TEST_REAL) ^ defined(TEST_COMPLEX)) ++error: define exactly one of TEST_REAL or TEST_COMPLEX ++#endif ++ ++#if !(defined(TEST_SINGLE) ^ defined(TEST_DOUBLE)) ++error: define exactly one of TEST_SINGLE or TEST_DOUBLE ++#endif ++ ++#ifdef TEST_SINGLE ++# define EV_TYPE real(kind=C_FLOAT) ++# ifdef TEST_REAL ++# define MATRIX_TYPE real(kind=C_FLOAT) ++# else ++# define MATRIX_TYPE complex(kind=C_FLOAT_COMPLEX) ++# endif ++#else ++# define EV_TYPE real(kind=C_DOUBLE) ++# ifdef TEST_REAL ++# define MATRIX_TYPE real(kind=C_DOUBLE) ++# else ++# define MATRIX_TYPE complex(kind=C_DOUBLE_COMPLEX) ++# endif ++#endif ++ ++ ++#ifdef TEST_REAL ++# define AUTOTUNE_DOMAIN ELPA_AUTOTUNE_DOMAIN_REAL ++#else ++# define AUTOTUNE_DOMAIN ELPA_AUTOTUNE_DOMAIN_COMPLEX ++#endif ++ ++#ifdef HAVE_64BIT_INTEGER_MATH_SUPPORT ++#define TEST_INT_TYPE integer(kind=c_int64_t) ++#define INT_TYPE c_int64_t ++#else ++#define TEST_INT_TYPE integer(kind=c_int32_t) ++#define INT_TYPE c_int32_t ++#endif ++ ++#ifdef HAVE_64BIT_INTEGER_MPI_SUPPORT ++#define TEST_INT_MPI_TYPE integer(kind=c_int64_t) ++#define INT_MPI_TYPE c_int64_t ++#else ++#define TEST_INT_MPI_TYPE integer(kind=c_int32_t) ++#define INT_MPI_TYPE c_int32_t ++#endif ++ ++ ++ ++#define TEST_GPU 0 ++#if (TEST_NVIDIA_GPU == 1) || (TEST_AMD_GPU == 1) ++#undef TEST_GPU ++#define TEST_GPU 1 ++#endif ++ ++ ++#include "assert.h" ++ ++program test ++ use elpa ++ ++ !use test_util ++ use test_setup_mpi ++ use test_prepare_matrix ++ use test_read_input_parameters ++ use test_blacs_infrastructure ++ use test_check_correctness ++ use test_analytic ++ use iso_fortran_env ++ ++#ifdef HAVE_REDIRECT ++ use test_redirect ++#endif ++ implicit none ++ ++ ! matrix dimensions ++ TEST_INT_TYPE :: na, nev, nblk ++ ++ ! mpi ++ TEST_INT_TYPE :: myid, nprocs ++ TEST_INT_TYPE :: na_cols, na_rows ! local matrix size ++ TEST_INT_TYPE :: np_cols, np_rows ! number of MPI processes per column/row ++ TEST_INT_TYPE :: my_prow, my_pcol ! local MPI task position (my_prow, my_pcol) in the grid (0..np_cols -1, 0..np_rows -1) ++ TEST_INT_MPI_TYPE :: mpierr ++ ++ ! blacs ++ character(len=1) :: layout ++ TEST_INT_TYPE :: my_blacs_ctxt, sc_desc(9), info, nprow, npcol ++ ++ ! The Matrix ++ MATRIX_TYPE, allocatable :: a(:,:), as(:,:) ++ ! eigenvectors ++ MATRIX_TYPE, allocatable :: z(:,:) ++ ! eigenvalues ++ EV_TYPE, allocatable :: ev(:) ++ ++ TEST_INT_TYPE :: status ++ integer(kind=c_int) :: error_elpa ++ ++ type(output_t) :: write_to_file ++ class(elpa_t), pointer :: e ++ class(elpa_autotune_t), pointer :: tune_state ++ ++ TEST_INT_TYPE :: iter ++ character(len=5) :: iter_string ++ ++ call read_input_parameters(na, nev, nblk, write_to_file) ++! call setup_mpi(myid, nprocs) ++ call MPI_INIT_THREAD(MPI_THREAD_SERIALIZED,info, mpierr) ++ call MPI_COMM_SIZE(MPI_COMM_WORLD,nprocs, mpierr) ++ call MPI_COMM_RANK(MPI_COMM_WORLD,myid, mpierr) ++#ifdef HAVE_REDIRECT ++#ifdef WITH_MPI ++ call MPI_BARRIER(MPI_COMM_WORLD, mpierr) ++ call redirect_stdout(myid) ++#endif ++#endif ++ ++ if (elpa_init(CURRENT_API_VERSION) /= ELPA_OK) then ++ print *, "ELPA API version not supported" ++ stop 1 ++ endif ++ ++ layout = 'C' ++ do np_cols = NINT(SQRT(REAL(nprocs))),2,-1 ++ if(mod(nprocs,np_cols) == 0 ) exit ++ enddo ++ np_rows = nprocs/np_cols ++ assert(nprocs == np_rows * np_cols) ++ ++ if (myid == 0) then ++ print '((a,i0))', 'Matrix size: ', na ++ print '((a,i0))', 'Num eigenvectors: ', nev ++ print '((a,i0))', 'Blocksize: ', nblk ++#ifdef WITH_MPI ++ print '((a,i0))', 'Num MPI proc: ', nprocs ++ print '(3(a,i0))','Number of processor rows=',np_rows,', cols=',np_cols,', total=',nprocs ++ print '(a)', 'Process layout: ' // layout ++#endif ++ print *,'' ++ endif ++ ++ call set_up_blacsgrid(int(mpi_comm_world,kind=BLAS_KIND), np_rows, np_cols, layout, & ++ my_blacs_ctxt, my_prow, my_pcol) ++ ++ call set_up_blacs_descriptor(na, nblk, my_prow, my_pcol, np_rows, np_cols, & ++ na_rows, na_cols, sc_desc, my_blacs_ctxt, info) ++ ++ allocate(a (na_rows,na_cols)) ++ allocate(as(na_rows,na_cols)) ++ allocate(z (na_rows,na_cols)) ++ allocate(ev(na)) ++ ++ a(:,:) = 0.0 ++ z(:,:) = 0.0 ++ ev(:) = 0.0 ++ ++ call prepare_matrix_analytic(na, a, nblk, myid, np_rows, np_cols, my_prow, my_pcol, print_times=.false.) ++ as(:,:) = a(:,:) ++ ++ e => elpa_allocate(error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call e%set("na", int(na,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("nev", int(nev,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("local_nrows", int(na_rows,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("local_ncols", int(na_cols,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("nblk", int(nblk,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ if (layout .eq. 'C') then ++ call e%set("matrix_order",COLUMN_MAJOR_ORDER,error_elpa) ++ else ++ call e%set("matrix_order",ROW_MAJOR_ORDER,error_elpa) ++ endif ++ ++#ifdef WITH_MPI ++ call e%set("mpi_comm_parent", int(MPI_COMM_WORLD,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("process_row", int(my_prow,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("process_col", int(my_pcol,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++ call e%set("timings",1, error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call e%set("debug",1, error_elpa) ++ assert_elpa_ok(error_elpa) ++#if TEST_NVIDIA_GPU == 1 || (TEST_NVIDIA_GPU == 0) && (TEST_AMD_GPU == 0) && (TEST_INTEL_GPU == 0) ++ call e%set("nvidia-gpu", 0, error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++#if TEST_AMD_GPU == 1 ++ call e%set("amd-gpu", 0, error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++#if TEST_INTEL_GPU == 1 ++ call e%set("intel-gpu", 0, error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++ ++ !call e%set("max_stored_rows", 15, error_elpa) ++ ++ !call e%set("solver", ELPA_SOLVER_2STAGE, error_elpa) ++ ++ assert_elpa_ok(e%setup()) ++ ++ if (myid == 0) print *, "" ++ ++ ! if you want to use the new autotuning implentation ++ !call e%autotune_set_api_version(20211125, error_elpa) ++ !assert_elpa_ok(error_elpa) ++ ! if you want to use the old one, either do not set autotune_set_api_version ++ ! or set autotune_set_api_version to a supported api version < 20211125 ++ tune_state => e%autotune_setup(ELPA_AUTOTUNE_MEDIUM, AUTOTUNE_DOMAIN, error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ iter=0 ++ do while (e%autotune_step(tune_state, error_elpa)) ++ assert_elpa_ok(error_elpa) ++ iter=iter+1 ++ write(iter_string,'(I5.5)') iter ++ !call e%print_settings() ++ !call e%store_settings("saved_parameters_"//trim(iter_string)//".txt") ++ call e%timer_start("eigenvectors: iteration "//trim(iter_string)) ++ call e%eigenvectors(a, ev, z, error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%timer_stop("eigenvectors: iteration "//trim(iter_string)) ++ ++ assert_elpa_ok(error_elpa) ++ if (myid .eq. 0) then ++ print *, "" ++ call e%print_times("eigenvectors: iteration "//trim(iter_string)) ++ endif ++ status = check_correctness_analytic(na, nev, ev, z, nblk, myid, np_rows, np_cols, my_prow, my_pcol, & ++ .true., .true., print_times=.false.) ++ a(:,:) = as(:,:) ++ call e%autotune_print_state(tune_state) ++ call e%autotune_save_state(tune_state, "saved_state_"//trim(iter_string)//".txt") ++ end do ++ ++ !! set and print the autotuned-settings ++ call e%autotune_set_best(tune_state, error_elpa) ++ assert_elpa_ok(error_elpa) ++ if (myid .eq. 0) then ++ flush(output_unit) ++ print *, "The best combination found by the autotuning:" ++ call e%autotune_print_best(tune_state, error_elpa) ++ assert_elpa_ok(error_elpa) ++ endif ++ ! de-allocate autotune object ++ call elpa_autotune_deallocate(tune_state, error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ if (myid .eq. 0) then ++ print *, "Running once more time with the best found setting..." ++ endif ++ call e%timer_start("eigenvectors: best setting") ++ call e%eigenvectors(a, ev, z, error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%timer_stop("eigenvectors: best setting") ++ assert_elpa_ok(error_elpa) ++ if (myid .eq. 0) then ++ ! print *, "" ++ call e%print_times("eigenvectors: best setting") ++ endif ++ status = check_correctness_analytic(na, nev, ev, z, nblk, myid, np_rows, np_cols, my_prow, my_pcol, & ++ .true., .true., print_times=.false.) ++ ++ call elpa_deallocate(e,error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ deallocate(a) ++ deallocate(as) ++ deallocate(z) ++ deallocate(ev) ++ ++ call elpa_uninit(error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++#ifdef WITH_MPI ++ call blacs_gridexit(my_blacs_ctxt) ++ call mpi_finalize(mpierr) ++#endif ++ ++ call exit(status) ++ ++end program +diff -ruN elpa-new_release_2021.11.001/examples/Fortran/test.F90 elpa-new_release_2021.11.001_ok/examples/Fortran/test.F90 +--- elpa-new_release_2021.11.001/examples/Fortran/test.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/Fortran/test.F90 2022-01-28 12:00:32.452129948 +0100 +@@ -0,0 +1,1207 @@ ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Max Planck Computing and Data Facility (MPCDF), formerly known as ++! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++! ++! ++#include "config-f90.h" ++ ++! Define one of TEST_REAL or TEST_COMPLEX ++! Define one of TEST_SINGLE or TEST_DOUBLE ++! Define one of TEST_SOLVER_1STAGE or TEST_SOLVER_2STAGE ++! Define TEST_NVIDIA_GPU \in [0, 1] ++! Define TEST_INTEL_GPU \in [0, 1] ++! Define TEST_AMD_GPU \in [0, 1] ++! Define either TEST_ALL_KERNELS or a TEST_KERNEL \in [any valid kernel] ++ ++#if !(defined(TEST_REAL) ^ defined(TEST_COMPLEX)) ++error: define exactly one of TEST_REAL or TEST_COMPLEX ++#endif ++ ++#if !(defined(TEST_SINGLE) ^ defined(TEST_DOUBLE)) ++error: define exactly one of TEST_SINGLE or TEST_DOUBLE ++#endif ++ ++#if !(defined(TEST_SOLVER_1STAGE) ^ defined(TEST_SOLVER_2STAGE) ^ defined(TEST_SCALAPACK_ALL) ^ defined(TEST_SCALAPACK_PART)) ++error: define exactly one of TEST_SOLVER_1STAGE or TEST_SOLVER_2STAGE or TEST_SCALAPACK_ALL or TEST_SCALAPACK_PART ++#endif ++ ++#ifdef TEST_SOLVER_1STAGE ++#ifdef TEST_ALL_KERNELS ++error: TEST_ALL_KERNELS cannot be defined for TEST_SOLVER_1STAGE ++#endif ++#ifdef TEST_KERNEL ++error: TEST_KERNEL cannot be defined for TEST_SOLVER_1STAGE ++#endif ++#endif ++ ++#ifdef TEST_SOLVER_2STAGE ++#if !(defined(TEST_KERNEL) ^ defined(TEST_ALL_KERNELS)) ++error: define either TEST_ALL_KERNELS or a valid TEST_KERNEL ++#endif ++#endif ++ ++#ifdef TEST_GENERALIZED_DECOMP_EIGENPROBLEM ++#define TEST_GENERALIZED_EIGENPROBLEM ++#endif ++ ++#ifdef TEST_SINGLE ++# define EV_TYPE real(kind=C_FLOAT) ++# ifdef TEST_REAL ++# define MATRIX_TYPE real(kind=C_FLOAT) ++# else ++# define MATRIX_TYPE complex(kind=C_FLOAT_COMPLEX) ++# endif ++#else ++# define EV_TYPE real(kind=C_DOUBLE) ++# ifdef TEST_REAL ++# define MATRIX_TYPE real(kind=C_DOUBLE) ++# else ++# define MATRIX_TYPE complex(kind=C_DOUBLE_COMPLEX) ++# endif ++#endif ++ ++#ifdef TEST_REAL ++#define KERNEL_KEY "real_kernel" ++#endif ++#ifdef TEST_COMPLEX ++#define KERNEL_KEY "complex_kernel" ++#endif ++ ++#ifdef HAVE_64BIT_INTEGER_MATH_SUPPORT ++#define TEST_INT_TYPE integer(kind=c_int64_t) ++#define INT_TYPE c_int64_t ++#else ++#define TEST_INT_TYPE integer(kind=c_int32_t) ++#define INT_TYPE c_int32_t ++#endif ++#ifdef HAVE_64BIT_INTEGER_MPI_SUPPORT ++#define TEST_INT_MPI_TYPE integer(kind=c_int64_t) ++#define INT_MPI_TYPE c_int64_t ++#else ++#define TEST_INT_MPI_TYPE integer(kind=c_int32_t) ++#define INT_MPI_TYPE c_int32_t ++#endif ++ ++#define TEST_GPU 0 ++#if (TEST_NVIDIA_GPU == 1) || (TEST_AMD_GPU == 1) || (TEST_INTEL_GPU == 1) ++#undef TEST_GPU ++#define TEST_GPU 1 ++#endif ++ ++#include "assert.h" ++ ++program test ++ use elpa ++ !use test_util ++ use test_setup_mpi ++ use test_prepare_matrix ++ use test_read_input_parameters ++ use test_blacs_infrastructure ++ use test_check_correctness ++ use test_analytic ++#ifdef WITH_SCALAPACK_TESTS ++ use test_scalapack ++#endif ++ ++#ifdef HAVE_REDIRECT ++ use test_redirect ++#endif ++#ifdef WITH_OPENMP_TRADITIONAL ++ use omp_lib ++#endif ++ use precision_for_tests ++ ++#if TEST_GPU_DEVICE_POINTER_API == 1 ++ use test_gpu ++#if TEST_NVIDIA_GPU == 1 ++ use test_cuda_functions ++#endif ++#if TEST_AMD_GPU == 1 ++ use test_hip_functions ++#endif ++ ++#endif /* TEST_GPU_DEVICE_POINTER_API */ ++ ++ implicit none ++ ++ ! matrix dimensions ++ TEST_INT_TYPE :: na, nev, nblk ++ ++ ! mpi ++ TEST_INT_TYPE :: myid, nprocs ++ TEST_INT_MPI_TYPE :: myidMPI, nprocsMPI ++ TEST_INT_TYPE :: na_cols, na_rows ! local matrix size ++ TEST_INT_TYPE :: np_cols, np_rows ! number of MPI processes per column/row ++ TEST_INT_TYPE :: my_prow, my_pcol ! local MPI task position (my_prow, my_pcol) in the grid (0..np_cols -1, 0..np_rows -1) ++ TEST_INT_MPI_TYPE :: mpierr ++ ++ ! blacs ++ TEST_INT_TYPE :: my_blacs_ctxt, sc_desc(9), info, nprow, npcol ++ ++ ! The Matrix ++ MATRIX_TYPE, allocatable, target :: a(:,:) ++ MATRIX_TYPE, allocatable :: as(:,:) ++#if defined(TEST_HERMITIAN_MULTIPLY) ++ MATRIX_TYPE, allocatable :: b(:,:), c(:,:) ++#endif ++#if defined(TEST_GENERALIZED_EIGENPROBLEM) ++ MATRIX_TYPE, allocatable :: b(:,:), bs(:,:) ++#endif ++ ! eigenvectors ++ MATRIX_TYPE, allocatable, target :: z(:,:) ++ ! eigenvalues ++ EV_TYPE, allocatable, target :: ev(:) ++ ++#if TEST_GPU_DEVICE_POINTER_API == 1 ++ type(c_ptr) :: a_dev, q_dev, ev_dev ++#endif ++ ++ ++ logical :: check_all_evals, skip_check_correctness ++ ++#if defined(TEST_MATRIX_TOEPLITZ) || defined(TEST_MATRIX_FRANK) ++ EV_TYPE, allocatable :: d(:), sd(:), ds(:), sds(:) ++ EV_TYPE :: diagonalELement, subdiagonalElement ++#endif ++ ++ TEST_INT_TYPE :: status ++ integer(kind=c_int) :: error_elpa ++ ++ type(output_t) :: write_to_file ++ class(elpa_t), pointer :: e ++#ifdef TEST_ALL_KERNELS ++ TEST_INT_TYPE :: i ++#endif ++#ifdef TEST_ALL_LAYOUTS ++ TEST_INT_TYPE :: i_layout ++#ifdef BUILD_FUGAKU ++ character(len=1) :: layouts(2) ++#else ++ character(len=1), parameter :: layouts(2) = [ 'C', 'R' ] ++#endif ++#endif ++ integer(kind=c_int):: kernel ++ character(len=1) :: layout ++ logical :: do_test_numeric_residual, do_test_numeric_residual_generalized, & ++ do_test_analytic_eigenvalues, & ++ do_test_analytic_eigenvalues_eigenvectors, & ++ do_test_frank_eigenvalues, & ++ do_test_toeplitz_eigenvalues, do_test_cholesky, & ++ do_test_hermitian_multiply ++ logical :: ignoreError, success, successGPU ++#ifdef WITH_OPENMP_TRADITIONAL ++ TEST_INT_TYPE :: max_threads, threads_caller ++#endif ++#if TEST_GPU_SET_ID == 1 ++ TEST_INT_TYPE :: gpuID ++#endif ++#ifdef SPLIT_COMM_MYSELF ++ TEST_INT_MPI_TYPE :: mpi_comm_rows, mpi_comm_cols, mpi_string_length, mpierr2 ++ character(len=MPI_MAX_ERROR_STRING) :: mpierr_string ++#endif ++ ++ ++#if TEST_GPU_DEVICE_POINTER_API == 1 ++#if TEST_REAL == 1 ++#if TEST_DOUBLE ++ integer(kind=c_intptr_t), parameter :: size_of_datatype = size_of_double_real ++#endif ++#if TEST_SINGLE ++ integer(kind=c_intptr_t), parameter :: size_of_datatype = size_of_single_real ++#endif ++#endif /* TEST_REAL == 1 */ ++ ++#if TEST_COMPLEX == 1 ++#if TEST_DOUBLE ++ integer(kind=c_intptr_t), parameter :: size_of_datatype = size_of_double_complex ++#endif ++#if TEST_SINGLE ++ integer(kind=c_intptr_t), parameter :: size_of_datatype = size_of_single_complex ++#endif ++#endif ++#endif /* TEST_GPU_DEVICE_POINTER_API == 1 */ ++ ++#ifdef TEST_ALL_LAYOUTS ++#ifdef BUILD_FUGAKU ++ layouts(1) = 'C' ++ layouts(2) = 'R' ++#endif ++#endif ++ ++ ignoreError = .false. ++ ++ call read_input_parameters_traditional(na, nev, nblk, write_to_file, skip_check_correctness) ++! call setup_mpi(myid, nprocs) ++ call MPI_INIT_THREAD(MPI_THREAD_SERIALIZED,info, mpierr) ++ call MPI_COMM_SIZE(MPI_COMM_WORLD,nprocs, mpierr) ++ call MPI_COMM_RANK(MPI_COMM_WORLD,myid, mpierr) ++ ++#ifdef HAVE_REDIRECT ++#ifdef WITH_MPI ++ call MPI_BARRIER(MPI_COMM_WORLD, mpierr) ++ call redirect_stdout(myid) ++#endif ++#endif ++ ++ check_all_evals = .true. ++ ++ ++ do_test_numeric_residual = .false. ++ do_test_numeric_residual_generalized = .false. ++ do_test_analytic_eigenvalues = .false. ++ do_test_analytic_eigenvalues_eigenvectors = .false. ++ do_test_frank_eigenvalues = .false. ++ do_test_toeplitz_eigenvalues = .false. ++ ++ do_test_cholesky = .false. ++#if defined(TEST_CHOLESKY) ++ do_test_cholesky = .true. ++#endif ++ do_test_hermitian_multiply = .false. ++#if defined(TEST_HERMITIAN_MULTIPLY) ++ do_test_hermitian_multiply = .true. ++#endif ++ ++ status = 0 ++ if (elpa_init(CURRENT_API_VERSION) /= ELPA_OK) then ++ print *, "ELPA API version not supported" ++ stop 1 ++ endif ++ ++ if (myid == 0) then ++ print '((a,i0))', 'Program ' ++ // TEST_CASE ++ print *, "" ++ endif ++ ++#ifdef TEST_ALL_LAYOUTS ++ do i_layout = 1, size(layouts) ! layouts ++ layout = layouts(i_layout) ++ do np_cols = 1, nprocs ! factors ++ if (mod(nprocs,np_cols) /= 0 ) then ++ cycle ++ endif ++#else ++ layout = 'C' ++ do np_cols = NINT(SQRT(REAL(nprocs))),2,-1 ++ if(mod(nprocs,np_cols) == 0 ) exit ++ enddo ++#endif ++ ++ np_rows = nprocs/np_cols ++ assert(nprocs == np_rows * np_cols) ++ ++ if (myid == 0) then ++ print '((a,i0))', 'Matrix size: ', na ++ print '((a,i0))', 'Num eigenvectors: ', nev ++ print '((a,i0))', 'Blocksize: ', nblk ++#ifdef WITH_MPI ++ print '((a,i0))', 'Num MPI proc: ', nprocs ++ print '(3(a,i0))','Number of processor rows=',np_rows,', cols=',np_cols,', total=',nprocs ++ print '(a)', 'Process layout: ' // layout ++#endif ++ print *,'' ++ endif ++ ++#if TEST_QR_DECOMPOSITION == 1 ++ ++#if (TEST_NVIDIA_GPU == 1) || (TEST_INTEL_GPU == 1) || (TEST_AMD_GPU == 1) ++#ifdef WITH_MPI ++ call mpi_finalize(mpierr) ++#endif ++ stop 77 ++#endif /* TEST_NVIDIA_GPU || TEST_INTEL_GPU */ ++ if (nblk .lt. 64) then ++ if (myid .eq. 0) then ++ print *,"At the moment QR decomposition need blocksize of at least 64" ++ endif ++ if ((na .lt. 64) .and. (myid .eq. 0)) then ++ print *,"This is why the matrix size must also be at least 64 or only 1 MPI task can be used" ++ endif ++ ++#ifdef WITH_MPI ++ call mpi_finalize(mpierr) ++#endif ++ stop 77 ++ endif ++#endif /* TEST_QR_DECOMPOSITION */ ++ ++ ++ call set_up_blacsgrid(int(mpi_comm_world,kind=BLAS_KIND), np_rows, & ++ np_cols, layout, my_blacs_ctxt, my_prow, & ++ my_pcol) ++ ++ ++#if defined(TEST_GENERALIZED_EIGENPROBLEM) && defined(TEST_ALL_LAYOUTS) ++#ifdef WITH_MPI ++ call mpi_finalize(mpierr) ++#endif ++ stop 77 ++#endif ++ ++ ++ call set_up_blacs_descriptor(na, nblk, my_prow, my_pcol, & ++ np_rows, np_cols, & ++ na_rows, na_cols, sc_desc, my_blacs_ctxt, info) ++ ++ allocate(a (na_rows,na_cols)) ++ allocate(as(na_rows,na_cols)) ++ allocate(z (na_rows,na_cols)) ++ allocate(ev(na)) ++ ++#ifdef TEST_HERMITIAN_MULTIPLY ++ allocate(b (na_rows,na_cols)) ++ allocate(c (na_rows,na_cols)) ++#endif ++ ++#ifdef TEST_GENERALIZED_EIGENPROBLEM ++ allocate(b (na_rows,na_cols)) ++ allocate(bs (na_rows,na_cols)) ++#endif ++ ++#if defined(TEST_MATRIX_TOEPLITZ) || defined(TEST_MATRIX_FRANK) ++ allocate(d (na), ds(na)) ++ allocate(sd (na), sds(na)) ++#endif ++ ++ a(:,:) = 0.0 ++ z(:,:) = 0.0 ++ ev(:) = 0.0 ++ ++#if defined(TEST_MATRIX_RANDOM) && !defined(TEST_SOLVE_TRIDIAGONAL) && !defined(TEST_CHOLESKY) && !defined(TEST_EIGENVALUES) ++ ! the random matrix can be used in allmost all tests; but for some no ++ ! correctness checks have been implemented; do not allow these ++ ! combinations ++ ! RANDOM + TEST_SOLVE_TRIDIAGONAL: we need a TOEPLITZ MATRIX ++ ! RANDOM + TEST_CHOLESKY: wee need SPD matrix ++ ! RANDOM + TEST_EIGENVALUES: no correctness check known ++ ++ ! We also have to take care of special case in TEST_EIGENVECTORS ++#if !defined(TEST_EIGENVECTORS) ++ call prepare_matrix_random(na, myid, sc_desc, a, z, as) ++#else /* TEST_EIGENVECTORS */ ++ if (nev .ge. 1) then ++ call prepare_matrix_random(na, myid, sc_desc, a, z, as) ++#ifndef TEST_HERMITIAN_MULTIPLY ++ do_test_numeric_residual = .true. ++#endif ++ else ++ if (myid .eq. 0) then ++ print *,"At the moment with the random matrix you need nev >=1" ++ endif ++#ifdef WITH_MPI ++ call mpi_finalize(mpierr) ++#endif ++ stop 77 ++ endif ++#endif /* TEST_EIGENVECTORS */ ++ do_test_analytic_eigenvalues = .false. ++ do_test_analytic_eigenvalues_eigenvectors = .false. ++ do_test_frank_eigenvalues = .false. ++ do_test_toeplitz_eigenvalues = .false. ++#endif /* (TEST_MATRIX_RANDOM) */ ++ ++#if defined(TEST_MATRIX_RANDOM) && defined(TEST_CHOLESKY) ++ call prepare_matrix_random_spd(na, myid, sc_desc, a, z, as, & ++ nblk, np_rows, np_cols, my_prow, my_pcol) ++ do_test_analytic_eigenvalues = .false. ++ do_test_analytic_eigenvalues_eigenvectors = .false. ++ do_test_frank_eigenvalues = .false. ++ do_test_toeplitz_eigenvalues = .false. ++#endif /* TEST_MATRIX_RANDOM and TEST_CHOLESKY */ ++ ++#if defined(TEST_MATRIX_RANDOM) && defined(TEST_GENERALIZED_EIGENPROBLEM) ++ ! call prepare_matrix_random(na, myid, sc_desc, a, z, as) ++ call prepare_matrix_random_spd(na, myid, sc_desc, b, z, bs, & ++ nblk, np_rows, np_cols, my_prow, my_pcol) ++ do_test_analytic_eigenvalues = .false. ++ do_test_analytic_eigenvalues_eigenvectors = .false. ++ do_test_frank_eigenvalues = .false. ++ do_test_toeplitz_eigenvalues = .false. ++ do_test_numeric_residual = .false. ++ do_test_numeric_residual_generalized = .true. ++#endif /* TEST_MATRIX_RANDOM and TEST_GENERALIZED_EIGENPROBLEM */ ++ ++#if defined(TEST_MATRIX_RANDOM) && (defined(TEST_SOLVE_TRIDIAGONAL) || defined(TEST_EIGENVALUES)) ++#error "Random matrix is not allowed in this configuration" ++#endif ++ ++#if defined(TEST_MATRIX_ANALYTIC) && !defined(TEST_SOLVE_TRIDIAGONAL) && !defined(TEST_CHOLESKY) ++ ! the analytic matrix can be used in allmost all tests; but for some no ++ ! correctness checks have been implemented; do not allow these ++ ! combinations ++ ! ANALYTIC + TEST_SOLVE_TRIDIAGONAL: we need a TOEPLITZ MATRIX ++ ! ANALTIC + TEST_CHOLESKY: no correctness check yet implemented ++ ++ call prepare_matrix_analytic(na, a, nblk, myid, np_rows, np_cols, my_prow, my_pcol) ++ as(:,:) = a ++ ++ do_test_numeric_residual = .false. ++ do_test_analytic_eigenvalues_eigenvectors = .false. ++#ifndef TEST_HERMITIAN_MULTIPLY ++ do_test_analytic_eigenvalues = .true. ++#endif ++#if defined(TEST_EIGENVECTORS) ++ if (nev .ge. 1) then ++ do_test_analytic_eigenvalues_eigenvectors = .true. ++ do_test_analytic_eigenvalues = .false. ++ else ++ do_test_analytic_eigenvalues_eigenvectors = .false. ++ endif ++#endif ++ do_test_frank_eigenvalues = .false. ++ do_test_toeplitz_eigenvalues = .false. ++#endif /* TEST_MATRIX_ANALYTIC */ ++#if defined(TEST_MATRIX_ANALYTIC) && (defined(TEST_SOLVE_TRIDIAGONAL) || defined(TEST_CHOLESKY)) ++#error "Analytic matrix is not allowd in this configuration" ++#endif ++ ++#if defined(TEST_MATRIX_TOEPLITZ) ++ ! The Toeplitz matrix works in each test ++#ifdef TEST_SINGLE ++ diagonalElement = 0.45_c_float ++ subdiagonalElement = 0.78_c_float ++#else ++ diagonalElement = 0.45_c_double ++ subdiagonalElement = 0.78_c_double ++#endif ++ ++! actually we test cholesky for diagonal matrix only ++#if defined(TEST_CHOLESKY) ++#ifdef TEST_SINGLE ++ diagonalElement = (2.546_c_float, 0.0_c_float) ++ subdiagonalElement = (0.0_c_float, 0.0_c_float) ++#else ++ diagonalElement = (2.546_c_double, 0.0_c_double) ++ subdiagonalElement = (0.0_c_double, 0.0_c_double) ++#endif ++#endif /* TEST_CHOLESKY */ ++ ++ ! check first whether to abort ++ if (na < 10) then ++#ifdef WITH_MPI ++ call mpi_finalize(mpierr) ++#endif ++ stop 77 ++ endif ++ call prepare_matrix_toeplitz(na, diagonalElement, subdiagonalElement, & ++ d, sd, ds, sds, a, as, nblk, np_rows, & ++ np_cols, my_prow, my_pcol) ++ ++ ++ do_test_numeric_residual = .false. ++#if defined(TEST_EIGENVECTORS) ++ if (nev .ge. 1) then ++ do_test_numeric_residual = .true. ++ else ++ do_test_numeric_residual = .false. ++ endif ++#endif ++ ++ do_test_analytic_eigenvalues = .false. ++ do_test_analytic_eigenvalues_eigenvectors = .false. ++ do_test_frank_eigenvalues = .false. ++#if defined(TEST_CHOLESKY) ++ do_test_toeplitz_eigenvalues = .false. ++#else ++ do_test_toeplitz_eigenvalues = .true. ++#endif ++ ++#endif /* TEST_MATRIX_TOEPLITZ */ ++ ++ ++#if defined(TEST_MATRIX_FRANK) && !defined(TEST_SOLVE_TRIDIAGONAL) && !defined(TEST_CHOLESKY) ++ ! the random matrix can be used in allmost all tests; but for some no ++ ! correctness checks have been implemented; do not allow these ++ ! combinations ++ ! FRANK + TEST_SOLVE_TRIDIAGONAL: we need a TOEPLITZ MATRIX ++ ! FRANK + TEST_CHOLESKY: no correctness check yet implemented ++ ++ ! We also have to take care of special case in TEST_EIGENVECTORS ++#if !defined(TEST_EIGENVECTORS) ++ call prepare_matrix_frank(na, a, z, as, nblk, np_rows, np_cols, my_prow, my_pcol) ++ ++ do_test_analytic_eigenvalues = .false. ++ do_test_analytic_eigenvalues_eigenvectors = .false. ++#ifndef TEST_HERMITIAN_MULTIPLY ++ do_test_frank_eigenvalues = .true. ++#endif ++ do_test_toeplitz_eigenvalues = .false. ++ ++#else /* TEST_EIGENVECTORS */ ++ ++ if (nev .ge. 1) then ++ call prepare_matrix_frank(na, a, z, as, nblk, np_rows, np_cols, my_prow, my_pcol) ++ ++ do_test_analytic_eigenvalues = .false. ++ do_test_analytic_eigenvalues_eigenvectors = .false. ++#ifndef TEST_HERMITIAN_MULTIPLY ++ do_test_frank_eigenvalues = .true. ++#endif ++ do_test_toeplitz_eigenvalues = .false. ++ do_test_numeric_residual = .false. ++ else ++ do_test_analytic_eigenvalues = .false. ++ do_test_analytic_eigenvalues_eigenvectors = .false. ++#ifndef TEST_HERMITIAN_MULTIPLY ++ do_test_frank_eigenvalues = .true. ++#endif ++ do_test_toeplitz_eigenvalues = .false. ++ do_test_numeric_residual = .false. ++ ++ endif ++ ++#endif /* TEST_EIGENVECTORS */ ++#endif /* (TEST_MATRIX_FRANK) */ ++#if defined(TEST_MATRIX_FRANK) && (defined(TEST_SOLVE_TRIDIAGONAL) || defined(TEST_CHOLESKY)) ++#error "FRANK matrix is not allowed in this configuration" ++#endif ++ ++ ++#ifdef TEST_HERMITIAN_MULTIPLY ++#ifdef TEST_REAL ++ ++#ifdef TEST_DOUBLE ++ b(:,:) = 2.0_c_double * a(:,:) ++ c(:,:) = 0.0_c_double ++#else ++ b(:,:) = 2.0_c_float * a(:,:) ++ c(:,:) = 0.0_c_float ++#endif ++ ++#endif /* TEST_REAL */ ++ ++#ifdef TEST_COMPLEX ++ ++#ifdef TEST_DOUBLE ++ b(:,:) = 2.0_c_double * a(:,:) ++ c(:,:) = (0.0_c_double, 0.0_c_double) ++#else ++ b(:,:) = 2.0_c_float * a(:,:) ++ c(:,:) = (0.0_c_float, 0.0_c_float) ++#endif ++ ++#endif /* TEST_COMPLEX */ ++ ++#endif /* TEST_HERMITIAN_MULTIPLY */ ++ ++! if the test is used for (repeated) performacne tests, one might want to skip the checking ++! of the results, which might be time-consuming and not necessary. ++ if(skip_check_correctness) then ++ do_test_numeric_residual = .false. ++ do_test_numeric_residual_generalized = .false. ++ do_test_analytic_eigenvalues = .false. ++ do_test_analytic_eigenvalues_eigenvectors = .false. ++ do_test_frank_eigenvalues = .false. ++ do_test_toeplitz_eigenvalues = .false. ++ do_test_cholesky = .false. ++ endif ++ ++ ++#ifdef WITH_OPENMP_TRADITIONAL ++ threads_caller = omp_get_max_threads() ++ if (myid == 0) then ++ print *,"The calling program uses ",threads_caller," threads" ++ endif ++#endif ++ ++ e => elpa_allocate(error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call e%set("na", int(na,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("nev", int(nev,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("local_nrows", int(na_rows,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("local_ncols", int(na_cols,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("nblk", int(nblk,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ if (layout .eq. 'C') then ++ call e%set("matrix_order",COLUMN_MAJOR_ORDER,error_elpa) ++ else ++ call e%set("matrix_order",ROW_MAJOR_ORDER,error_elpa) ++ endif ++ ++#ifdef WITH_MPI ++#ifdef SPLIT_COMM_MYSELF ++ call mpi_comm_split(MPI_COMM_WORLD, int(my_pcol,kind=MPI_KIND), int(my_prow,kind=MPI_KIND), & ++ mpi_comm_rows, mpierr) ++ if (mpierr .ne. MPI_SUCCESS) then ++ call MPI_ERROR_STRING(mpierr, mpierr_string, mpi_string_length, mpierr2) ++ write(error_unit,*) "MPI ERROR occured during mpi_comm_split for row communicator: ", trim(mpierr_string) ++ stop 1 ++ endif ++ ++ call mpi_comm_split(MPI_COMM_WORLD, int(my_prow,kind=MPI_KIND), int(my_pcol,kind=MPI_KIND), & ++ mpi_comm_cols, mpierr) ++ if (mpierr .ne. MPI_SUCCESS) then ++ call MPI_ERROR_STRING(mpierr,mpierr_string, mpi_string_length, mpierr2) ++ write(error_unit,*) "MPI ERROR occured during mpi_comm_split for col communicator: ", trim(mpierr_string) ++ stop 1 ++ endif ++ ++ call e%set("mpi_comm_parent", int(MPI_COMM_WORLD,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("mpi_comm_rows", int(mpi_comm_rows,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("mpi_comm_cols", int(mpi_comm_cols,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++#else ++ call e%set("mpi_comm_parent", int(MPI_COMM_WORLD,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("process_row", int(my_prow,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("process_col", int(my_pcol,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%set("verbose", 1, error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++#endif ++#ifdef TEST_GENERALIZED_EIGENPROBLEM ++ call e%set("blacs_context", int(my_blacs_ctxt,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++ call e%set("timings", 1_ik, error_elpa) ++ assert_elpa_ok(e%setup()) ++ ++#ifdef TEST_SOLVER_1STAGE ++ call e%set("solver", ELPA_SOLVER_1STAGE, error_elpa) ++#else ++ call e%set("solver", ELPA_SOLVER_2STAGE, error_elpa) ++#endif ++ assert_elpa_ok(error_elpa) ++ ++#if TEST_NVIDIA_GPU == 1 ++ call e%set("nvidia-gpu", TEST_GPU, error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++ ++#if TEST_AMD_GPU == 1 ++ call e%set("amd-gpu", TEST_GPU, error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++ ++#if TEST_INTEL_GPU == 1 ++ call e%set("intel-gpu", TEST_GPU, error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++ ++#if (TEST_GPU_SET_ID == 1) && (TEST_INTEL_GPU == 0) ++ ! simple test ++ ! Can (and should) fail often ++ !gpuID = mod(myid,2) ++ gpuID = mod(myid,1) ++ call e%set("use_gpu_id", int(gpuID,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++ ++#if TEST_GPU_DEVICE_POINTER_API == 1 ++#if defined(TEST_EIGENVECTORS) && defined(TEST_MATRIX_RANDOM) ++ ! create device pointers for a,q, ev copy a to ++#if TEST_NVIDIA_GPU == 1 ++ if (gpu_vendor(NVIDIA_GPU) == NVIDIA_GPU) then ++ call set_gpu_parameters() ++ endif ++#endif ++#if TEST_AMD_GPU == 1 ++ if (gpu_vendor(AMD_GPU) == AMD_GPU) then ++ call set_gpu_parameters() ++ endif ++#endif ++ ++ ! set device ++ success = .true. ++#if TEST_NVIDIA_GPU == 1 ++ success = cuda_setdevice(gpuID) ++#endif ++#if TEST_AMD_GPU == 1 ++ success = cuda_setdevice(gpuID) ++#endif ++ if (.not.(success)) then ++ print *,"Cannot set GPU device. Aborting..." ++ stop ++ endif ++ ++ ! malloc ++ successGPU = gpu_malloc(a_dev, na_rows*na_cols*size_of_datatype) ++ if (.not.(successGPU)) then ++ print *,"Cannot allocate matrix a on GPU! Aborting..." ++ stop ++ endif ++ successGPU = gpu_malloc(q_dev, na_rows*na_cols*size_of_datatype) ++ if (.not.(successGPU)) then ++ print *,"Cannot allocate matrix q on GPU! Aborting..." ++ stop ++ endif ++ successGPU = gpu_malloc(ev_dev, na*size_of_datatype) ++ if (.not.(successGPU)) then ++ print *,"Cannot allocate vector of eigenvalues on GPU! Aborting..." ++ stop ++ endif ++ ++ successGPU = gpu_memcpy(a_dev, c_loc(a), na_rows*na_cols*size_of_datatype, & ++ gpuMemcpyHostToDevice) ++ if (.not.(successGPU)) then ++ print *,"Cannot copy matrix a to GPU! Aborting..." ++ stop ++ endif ++#endif ++#endif /* TEST_GPU_DEVICE_POINTER_API */ ++ ++#if TEST_QR_DECOMPOSITION == 1 ++ call e%set("qr", 1_ik, error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++ ++#ifdef WITH_OPENMP_TRADITIONAL ++ max_threads=omp_get_max_threads() ++ call e%set("omp_threads", int(max_threads,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++ ++ if (myid == 0) print *, "" ++ ++#ifdef TEST_ALL_KERNELS ++ do i = 0, elpa_option_cardinality(KERNEL_KEY) ! kernels ++#if (TEST_NVIDIA_GPU == 0) && (TEST_INTEL_GPU == 0) && (TEST_AMD_GPU == 0) ++ !if (TEST_GPU .eq. 0) then ++ kernel = elpa_option_enumerate(KERNEL_KEY, int(i,kind=c_int)) ++ if (kernel .eq. ELPA_2STAGE_REAL_NVIDIA_GPU) continue ++ if (kernel .eq. ELPA_2STAGE_COMPLEX_NVIDIA_GPU) continue ++ if (kernel .eq. ELPA_2STAGE_REAL_AMD_GPU) continue ++ if (kernel .eq. ELPA_2STAGE_COMPLEX_AMD_GPU) continue ++ if (kernel .eq. ELPA_2STAGE_REAL_INTEL_GPU) continue ++ if (kernel .eq. ELPA_2STAGE_COMPLEX_INTEL_GPU) continue ++ !endif ++#endif ++#endif ++ ++#ifdef TEST_KERNEL ++ kernel = TEST_KERNEL ++#endif ++ ++#ifdef TEST_SOLVER_2STAGE ++#if TEST_NVIDIA_GPU == 1 ++#if defined TEST_REAL ++#if (TEST_NVIDIA_GPU == 1) ++#if WITH_NVIDIA_GPU_SM80_COMPUTE_CAPABILITY == 1 ++ kernel = ELPA_2STAGE_REAL_NVIDIA_SM80_GPU ++#else ++ kernel = ELPA_2STAGE_REAL_NVIDIA_GPU ++#endif ++#endif /* TEST_NVIDIA_GPU */ ++#if (TEST_AMD_GPU == 1) ++ kernel = ELPA_2STAGE_REAL_AMD_GPU ++#endif ++#if (TEST_INTEL_GPU == 1) ++ kernel = ELPA_2STAGE_REAL_INTEL_GPU ++#endif ++#endif /* TEST_REAL */ ++ ++#if defined TEST_COMPLEX ++#if (TEST_NVIDIA_GPU == 1) ++ kernel = ELPA_2STAGE_COMPLEX_NVIDIA_GPU ++#endif ++#if (TEST_AMD_GPU == 1) ++ kernel = ELPA_2STAGE_COMPLEX_AMD_GPU ++#endif ++#if (TEST_INTEL_GPU == 1) ++ kernel = ELPA_2STAGE_COMPLEX_INTEL_GPU ++#endif ++#endif /* TEST_COMPLEX */ ++#endif /* TEST_GPU == 1 */ ++ ++ ++ call e%set(KERNEL_KEY, kernel, error_elpa) ++#ifdef TEST_KERNEL ++ assert_elpa_ok(error_elpa) ++#else ++ if (error_elpa /= ELPA_OK) then ++ cycle ++ endif ++ ! actually used kernel might be different if forced via environment variables ++ call e%get(KERNEL_KEY, kernel, error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++ if (myid == 0) then ++ print *, elpa_int_value_to_string(KERNEL_KEY, kernel) // " kernel" ++ endif ++#endif ++ ++#if !defined(TEST_ALL_LAYOUTS) ++! print all parameters ++ call e%print_settings(error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++ ++#ifdef TEST_ALL_KERNELS ++ call e%timer_start(elpa_int_value_to_string(KERNEL_KEY, kernel)) ++#endif ++ ++ ! The actual solve step ++#if defined(TEST_EIGENVECTORS) ++#if TEST_QR_DECOMPOSITION == 1 ++ call e%timer_start("e%eigenvectors_qr()") ++#else ++ call e%timer_start("e%eigenvectors()") ++#endif ++#ifdef TEST_SCALAPACK_ALL ++ call solve_scalapack_all(na, a, sc_desc, ev, z) ++#elif TEST_SCALAPACK_PART ++ call solve_scalapack_part(na, a, sc_desc, nev, ev, z) ++ check_all_evals = .false. ! scalapack does not compute all eigenvectors ++#else /* TEST_SCALAPACK_PART */ ++#ifdef TEST_EXPLICIT_NAME ++#if defined(TEST_REAL) ++#if defined(TEST_DOUBLE) ++#if (TEST_GPU_DEVICE_POINTER_API == 1) && defined(TEST_MATRIX_RANDOM) && defined(TEST_EIGENVECTORS) ++ call e%eigenvectors_double(a_dev, ev_dev, q_dev, error_elpa) ++#else ++ call e%eigenvectors_double(a, ev, z, error_elpa) ++#endif ++#endif /* TEST_DOUBLE */ ++#if defined(TEST_SINGLE) ++#if (TEST_GPU_DEVICE_POINTER_API == 1) && defined(TEST_MATRIX_RANDOM) && defined(TEST_EIGENVECTORS) ++ call e%eigenvectors_float(a_dev, ev_dev, q_dev, error_elpa) ++#else ++ call e%eigenvectors_float(a, ev, z, error_elpa) ++#endif ++#endif /* TEST_SINGLE */ ++#endif /* TEST_REAL */ ++#if defined(TEST_COMPLEX) ++#if defined(TEST_DOUBLE) ++#if (TEST_GPU_DEVICE_POINTER_API == 1) && defined(TEST_MATRIX_RANDOM) && defined(TEST_EIGENVECTORS) ++ call e%eigenvectors_double_complex(a_dev, ev_dev, q_dev, error_elpa) ++#else ++ call e%eigenvectors_double_complex(a, ev, z, error_elpa) ++#endif ++#endif /* TEST_DOUBLE */ ++#if defined(TEST_SINGLE) ++#if (TEST_GPU_DEVICE_POINTER_API == 1) && defined(TEST_MATRIX_RANDOM) && defined(TEST_EIGENVECTORS) ++ call e%eigenvectors_float_complex(a_dev, ev_dev, q_dev, error_elpa) ++#else ++ call e%eigenvectors_float_complex(a, ev, z, error_elpa) ++#endif ++#endif /* TEST_SINGLE */ ++#endif /* TEST_COMPLEX */ ++#else /* TEST_EXPLICIT_NAME */ ++ call e%eigenvectors(a, ev, z, error_elpa) ++#endif /* TEST_EXPLICIT_NAME */ ++#endif /* TEST_SCALAPACK_PART */ ++#if TEST_QR_DECOMPOSITION == 1 ++ call e%timer_stop("e%eigenvectors_qr()") ++#else ++ call e%timer_stop("e%eigenvectors()") ++#endif ++#endif /* TEST_EIGENVECTORS */ ++ ++#ifdef TEST_EIGENVALUES ++ call e%timer_start("e%eigenvalues()") ++#ifdef TEST_EXPLICIT_NAME ++#if defined(TEST_REAL) ++#if defined(TEST_DOUBLE) ++ call e%eigenvalues_double(a, ev, error_elpa) ++#endif ++#if defined(TEST_SINGLE) ++ call e%eigenvalues_float(a, ev, error_elpa) ++#endif ++#endif /* TEST_REAL */ ++#if defined(TEST_COMPLEX) ++#if defined(TEST_DOUBLE) ++ call e%eigenvalues_double_complex(a, ev, error_elpa) ++#endif ++#if defined(TEST_SINGLE) ++ call e%eigenvalues_float_complex(a, ev, error_elpa) ++#endif ++#endif ++#else /* TEST_EXPLICIT_NAME */ ++ call e%eigenvalues(a, ev, error_elpa) ++#endif /* TEST_EXPLICIT_NAME */ ++ call e%timer_stop("e%eigenvalues()") ++#endif ++ ++#if defined(TEST_SOLVE_TRIDIAGONAL) ++ call e%timer_start("e%solve_tridiagonal()") ++ call e%solve_tridiagonal(d, sd, z, error_elpa) ++ call e%timer_stop("e%solve_tridiagonal()") ++ ev(:) = d(:) ++#endif ++ ++#if defined(TEST_CHOLESKY) ++ call e%timer_start("e%cholesky()") ++ call e%cholesky(a, error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e%timer_stop("e%cholesky()") ++#endif ++ ++#if defined(TEST_HERMITIAN_MULTIPLY) ++ call e%timer_start("e%hermitian_multiply()") ++ call e%hermitian_multiply('F','F', int(na,kind=c_int), a, b, int(na_rows,kind=c_int), & ++ int(na_cols,kind=c_int), c, int(na_rows,kind=c_int), & ++ int(na_cols,kind=c_int), error_elpa) ++ call e%timer_stop("e%hermitian_multiply()") ++#endif ++ ++#if defined(TEST_GENERALIZED_EIGENPROBLEM) ++ call e%timer_start("e%generalized_eigenvectors()") ++#if defined(TEST_GENERALIZED_DECOMP_EIGENPROBLEM) ++ call e%timer_start("is_already_decomposed=.false.") ++#endif ++ call e%generalized_eigenvectors(a, b, ev, z, .false., error_elpa) ++#if defined(TEST_GENERALIZED_DECOMP_EIGENPROBLEM) ++ call e%timer_stop("is_already_decomposed=.false.") ++ a = as ++ call e%timer_start("is_already_decomposed=.true.") ++ call e%generalized_eigenvectors(a, b, ev, z, .true., error_elpa) ++ call e%timer_stop("is_already_decomposed=.true.") ++#endif ++ call e%timer_stop("e%generalized_eigenvectors()") ++#endif ++ ++ assert_elpa_ok(error_elpa) ++ ++#ifdef TEST_ALL_KERNELS ++ call e%timer_stop(elpa_int_value_to_string(KERNEL_KEY, kernel)) ++#endif ++ ++ if (myid .eq. 0) then ++#ifdef TEST_ALL_KERNELS ++ call e%print_times(elpa_int_value_to_string(KERNEL_KEY, kernel)) ++#else /* TEST_ALL_KERNELS */ ++ ++#if defined(TEST_EIGENVECTORS) ++#if TEST_QR_DECOMPOSITION == 1 ++ call e%print_times("e%eigenvectors_qr()") ++#else ++ call e%print_times("e%eigenvectors()") ++#endif ++#endif ++#ifdef TEST_EIGENVALUES ++ call e%print_times("e%eigenvalues()") ++#endif ++#ifdef TEST_SOLVE_TRIDIAGONAL ++ call e%print_times("e%solve_tridiagonal()") ++#endif ++#ifdef TEST_CHOLESKY ++ call e%print_times("e%cholesky()") ++#endif ++#ifdef TEST_HERMITIAN_MULTIPLY ++ call e%print_times("e%hermitian_multiply()") ++#endif ++#ifdef TEST_GENERALIZED_EIGENPROBLEM ++ call e%print_times("e%generalized_eigenvectors()") ++#endif ++#endif /* TEST_ALL_KERNELS */ ++ endif ++ ++ ++ ++ ++#if TEST_GPU_DEVICE_POINTER_API == 1 ++#if defined(TEST_EIGENVECTORS) && defined(TEST_MATRIX_RANDOM) ++ ! copy for testing ++ successGPU = gpu_memcpy(c_loc(z), q_dev, na_rows*na_cols*size_of_datatype, & ++ gpuMemcpyDeviceToHost) ++ if (.not.(successGPU)) then ++ print *,"cannot copy matrix of eigenvectors from GPU to host! Aborting..." ++ stop ++ endif ++ ++ successGPU = gpu_memcpy(c_loc(ev), ev_dev, na*& ++#ifdef TEST_DOUBLE ++ size_of_double_real, & ++#endif ++#ifdef TEST_SINGLE ++ size_of_single_real, & ++#endif ++ gpuMemcpyDeviceToHost) ++ if (.not.(successGPU)) then ++ print *,"cannot copy vector of eigenvalues from GPU to host! Aborting..." ++ stop ++ endif ++ ++ ! and deallocate device pointer ++ successGPU = gpu_free(a_dev) ++ if (.not.(successGPU)) then ++ print *,"cannot free memory of a_dev on GPU. Aborting..." ++ stop ++ endif ++ successGPU = gpu_free(q_dev) ++ if (.not.(successGPU)) then ++ print *,"cannot free memory of q_dev on GPU. Aborting..." ++ stop ++ endif ++ successGPU = gpu_free(ev_dev) ++ if (.not.(successGPU)) then ++ print *,"cannot free memory of ev_dev on GPU. Aborting..." ++ stop ++ endif ++#endif ++#endif ++ ++ ++ if (do_test_analytic_eigenvalues) then ++ status = check_correctness_analytic(na, nev, ev, z, nblk, myid, np_rows, np_cols, & ++ my_prow, my_pcol, check_all_evals, .false.) ++ call check_status(status, myid) ++ endif ++ ++ if (do_test_analytic_eigenvalues_eigenvectors) then ++ status = check_correctness_analytic(na, nev, ev, z, nblk, myid, np_rows, np_cols, & ++ my_prow, my_pcol, check_all_evals, .true.) ++ call check_status(status, myid) ++ endif ++ ++ if(do_test_numeric_residual) then ++ status = check_correctness_evp_numeric_residuals(na, nev, as, z, ev, sc_desc, nblk, myid, & ++ np_rows,np_cols, my_prow, my_pcol) ++ call check_status(status, myid) ++ endif ++ ++ if (do_test_frank_eigenvalues) then ++ status = check_correctness_eigenvalues_frank(na, ev, z, myid) ++ call check_status(status, myid) ++ endif ++ ++ if (do_test_toeplitz_eigenvalues) then ++#if defined(TEST_EIGENVALUES) || defined(TEST_SOLVE_TRIDIAGONAL) ++ status = check_correctness_eigenvalues_toeplitz(na, diagonalElement, & ++ subdiagonalElement, ev, z, myid) ++ call check_status(status, myid) ++#endif ++ endif ++ ++ if (do_test_cholesky) then ++ status = check_correctness_cholesky(na, a, as, na_rows, sc_desc, myid ) ++ call check_status(status, myid) ++ endif ++ ++#ifdef TEST_HERMITIAN_MULTIPLY ++ if (do_test_hermitian_multiply) then ++ status = check_correctness_hermitian_multiply(na, a, b, c, na_rows, sc_desc, myid ) ++ call check_status(status, myid) ++ endif ++#endif ++ ++#ifdef TEST_GENERALIZED_EIGENPROBLEM ++ if(do_test_numeric_residual_generalized) then ++ status = check_correctness_evp_numeric_residuals(na, nev, as, z, ev, sc_desc, nblk, myid, np_rows, & ++ np_cols, my_prow, & ++ my_pcol, bs) ++ call check_status(status, myid) ++ endif ++#endif ++ ++ ++#ifdef WITH_OPENMP_TRADITIONAL ++ if (threads_caller .ne. omp_get_max_threads()) then ++ if (myid .eq. 0) then ++ print *, " ERROR! the number of OpenMP threads has not been restored correctly" ++ endif ++ status = 1 ++ endif ++#endif ++ if (myid == 0) then ++ print *, "" ++ endif ++ ++#ifdef TEST_ALL_KERNELS ++ a(:,:) = as(:,:) ++#if defined(TEST_MATRIX_TOEPLITZ) || defined(TEST_MATRIX_FRANK) ++ d = ds ++ sd = sds ++#endif ++ end do ! kernels ++#endif ++ ++ call elpa_deallocate(e, error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ deallocate(a) ++ deallocate(as) ++ deallocate(z) ++ deallocate(ev) ++ ++#ifdef TEST_HERMITIAN_MULTIPLY ++ deallocate(b) ++ deallocate(c) ++#endif ++#if defined(TEST_MATRIX_TOEPLITZ) || defined(TEST_MATRIX_FRANK) ++ deallocate(d, ds) ++ deallocate(sd, sds) ++#endif ++#if defined(TEST_GENERALIZED_EIGENPROBLEM) ++ deallocate(b, bs) ++#endif ++ ++#ifdef TEST_ALL_LAYOUTS ++ end do ! factors ++ end do ! layouts ++#endif ++ call elpa_uninit(error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++#ifdef WITH_MPI ++ call blacs_gridexit(my_blacs_ctxt) ++ call mpi_finalize(mpierr) ++#endif ++ ++ call exit(status) ++ ++ contains ++ ++ subroutine check_status(status, myid) ++ implicit none ++ TEST_INT_TYPE, intent(in) :: status, myid ++ TEST_INT_MPI_TYPE :: mpierr ++ if (status /= 0) then ++ if (myid == 0) print *, "Result incorrect!" ++#ifdef WITH_MPI ++ call mpi_finalize(mpierr) ++#endif ++ call exit(status) ++ endif ++ end subroutine ++ ++end program +diff -ruN elpa-new_release_2021.11.001/examples/Fortran/test_multiple_objs.F90 elpa-new_release_2021.11.001_ok/examples/Fortran/test_multiple_objs.F90 +--- elpa-new_release_2021.11.001/examples/Fortran/test_multiple_objs.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/Fortran/test_multiple_objs.F90 2022-01-28 09:48:29.071140954 +0100 +@@ -0,0 +1,411 @@ ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Max Planck Computing and Data Facility (MPCDF), formerly known as ++! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++! ++! ++#include "config-f90.h" ++ ++! Define one of TEST_REAL or TEST_COMPLEX ++! Define one of TEST_SINGLE or TEST_DOUBLE ++! Define one of TEST_SOLVER_1STAGE or TEST_SOLVER_2STAGE ++! Define TEST_NVIDIA_GPU \in [0, 1] ++! Define TEST_INTEL_GPU \in [0, 1] ++! Define TEST_AMD_GPU \in [0, 1] ++! Define either TEST_ALL_KERNELS or a TEST_KERNEL \in [any valid kernel] ++ ++#if !(defined(TEST_REAL) ^ defined(TEST_COMPLEX)) ++error: define exactly one of TEST_REAL or TEST_COMPLEX ++#endif ++ ++#if !(defined(TEST_SINGLE) ^ defined(TEST_DOUBLE)) ++error: define exactly one of TEST_SINGLE or TEST_DOUBLE ++#endif ++ ++#ifdef TEST_SINGLE ++# define EV_TYPE real(kind=C_FLOAT) ++# ifdef TEST_REAL ++# define MATRIX_TYPE real(kind=C_FLOAT) ++# else ++# define MATRIX_TYPE complex(kind=C_FLOAT_COMPLEX) ++# endif ++#else ++# define EV_TYPE real(kind=C_DOUBLE) ++# ifdef TEST_REAL ++# define MATRIX_TYPE real(kind=C_DOUBLE) ++# else ++# define MATRIX_TYPE complex(kind=C_DOUBLE_COMPLEX) ++# endif ++#endif ++ ++ ++#ifdef TEST_REAL ++# define AUTOTUNE_DOMAIN ELPA_AUTOTUNE_DOMAIN_REAL ++#else ++# define AUTOTUNE_DOMAIN ELPA_AUTOTUNE_DOMAIN_COMPLEX ++#endif ++ ++#ifdef HAVE_64BIT_INTEGER_MATH_SUPPORT ++#define TEST_INT_TYPE integer(kind=c_int64_t) ++#define INT_TYPE c_int64_t ++#else ++#define TEST_INT_TYPE integer(kind=c_int32_t) ++#define INT_TYPE c_int32_t ++#endif ++ ++#ifdef HAVE_64BIT_INTEGER_MPI_SUPPORT ++#define TEST_INT_MPI_TYPE integer(kind=c_int64_t) ++#define INT_MPI_TYPE c_int64_t ++#else ++#define TEST_INT_MPI_TYPE integer(kind=c_int32_t) ++#define INT_MPI_TYPE c_int32_t ++#endif ++ ++#define TEST_GPU 0 ++#if (TEST_NVIDIA_GPU == 1) || (TEST_AMD_GPU == 1) || (TEST_INTEL_GPU == 1) ++#undef TEST_GPU ++#define TEST_GPU 1 ++#endif ++ ++#include "assert.h" ++ ++program test ++ use elpa ++ ++ !use test_util ++ use test_setup_mpi ++ use test_prepare_matrix ++ use test_read_input_parameters ++ use test_blacs_infrastructure ++ use test_check_correctness ++ use test_analytic ++ use iso_fortran_env ++ ++#ifdef HAVE_REDIRECT ++ use test_redirect ++#endif ++ implicit none ++ ++ ! matrix dimensions ++ TEST_INT_TYPE :: na, nev, nblk ++ ++ ! mpi ++ TEST_INT_TYPE :: myid, nprocs ++ TEST_INT_TYPE :: na_cols, na_rows ! local matrix size ++ TEST_INT_TYPE :: np_cols, np_rows ! number of MPI processes per column/row ++ TEST_INT_TYPE :: my_prow, my_pcol ! local MPI task position (my_prow, my_pcol) in the grid (0..np_cols -1, 0..np_rows -1) ++ TEST_INT_TYPE :: ierr ++ TEST_INT_MPI_TYPE :: mpierr ++ ! blacs ++ character(len=1) :: layout ++ TEST_INT_TYPE :: my_blacs_ctxt, sc_desc(9), info, nprow, npcol ++ ++ ! The Matrix ++ MATRIX_TYPE, allocatable :: a(:,:), as(:,:) ++ ! eigenvectors ++ MATRIX_TYPE, allocatable :: z(:,:) ++ ! eigenvalues ++ EV_TYPE, allocatable :: ev(:) ++ ++ TEST_INT_TYPE :: status ++ integer(kind=c_int) :: error_elpa ++ ++ type(output_t) :: write_to_file ++ class(elpa_t), pointer :: e1, e2, e_ptr ++ class(elpa_autotune_t), pointer :: tune_state ++ ++ TEST_INT_TYPE :: iter ++ character(len=5) :: iter_string ++ integer(kind=c_int) :: timings, debug, gpu ++ ++ call read_input_parameters(na, nev, nblk, write_to_file) ++! call setup_mpi(myid, nprocs) ++ call MPI_INIT_THREAD(MPI_THREAD_SERIALIZED,info, mpierr) ++ call MPI_COMM_SIZE(MPI_COMM_WORLD,nprocs, mpierr) ++ call MPI_COMM_RANK(MPI_COMM_WORLD,myid, mpierr) ++#ifdef HAVE_REDIRECT ++#ifdef WITH_MPI ++ call MPI_BARRIER(MPI_COMM_WORLD, mpierr) ++ call redirect_stdout(myid) ++#endif ++#endif ++ ++ if (elpa_init(CURRENT_API_VERSION) /= ELPA_OK) then ++ print *, "ELPA API version not supported" ++ stop 1 ++ endif ++ ++ layout = 'C' ++ do np_cols = NINT(SQRT(REAL(nprocs))),2,-1 ++ if(mod(nprocs,np_cols) == 0 ) exit ++ enddo ++ np_rows = nprocs/np_cols ++ assert(nprocs == np_rows * np_cols) ++ ++ if (myid == 0) then ++ print '((a,i0))', 'Matrix size: ', na ++ print '((a,i0))', 'Num eigenvectors: ', nev ++ print '((a,i0))', 'Blocksize: ', nblk ++#ifdef WITH_MPI ++ print '((a,i0))', 'Num MPI proc: ', nprocs ++ print '(3(a,i0))','Number of processor rows=',np_rows,', cols=',np_cols,', total=',nprocs ++ print '(a)', 'Process layout: ' // layout ++#endif ++ print *,'' ++ endif ++ ++ call set_up_blacsgrid(int(mpi_comm_world,kind=BLAS_KIND), np_rows, np_cols, layout, & ++ my_blacs_ctxt, my_prow, my_pcol) ++ ++ call set_up_blacs_descriptor(na, nblk, my_prow, my_pcol, np_rows, np_cols, & ++ na_rows, na_cols, sc_desc, my_blacs_ctxt, info) ++ ++ allocate(a (na_rows,na_cols)) ++ allocate(as(na_rows,na_cols)) ++ allocate(z (na_rows,na_cols)) ++ allocate(ev(na)) ++ ++ a(:,:) = 0.0 ++ z(:,:) = 0.0 ++ ev(:) = 0.0 ++ ++ call prepare_matrix_analytic(na, a, nblk, myid, np_rows, np_cols, my_prow, my_pcol, print_times=.false.) ++ as(:,:) = a(:,:) ++ ++ e1 => elpa_allocate(error_elpa) ++ !assert_elpa_ok(error_elpa) ++ ++ call set_basic_params(e1, na, nev, na_rows, na_cols, my_prow, my_pcol) ++ ++ call e1%set("timings",1, error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call e1%set("debug",1, error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++#if TEST_NVIDIA_GPU == 1 || (TEST_NVIDIA_GPU == 0) && (TEST_AMD_GPU == 0) && (TEST_INTEL_GPU == 0) ++ call e1%set("nvidia-gpu", TEST_GPU, error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++ ++#if TEST_AMD_GPU == 1 ++ call e1%set("amd-gpu", TEST_GPU, error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++ ++#if TEST_INTEL_GPU == 1 ++ call e1%set("intel-gpu", TEST_GPU, error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++ !call e1%set("max_stored_rows", 15, error_elpa) ++ ++ assert_elpa_ok(e1%setup()) ++ ++ call e1%store_settings("initial_parameters.txt", error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++#ifdef WITH_MPI ++ ! barrier after store settings, file created from one MPI rank only, but loaded everywhere ++ call MPI_BARRIER(MPI_COMM_WORLD, mpierr) ++#endif ++ ++ ! try to load parameters into another object ++ e2 => elpa_allocate(error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call set_basic_params(e2, na, nev, na_rows, na_cols, my_prow, my_pcol) ++ call e2%load_settings("initial_parameters.txt", error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ assert_elpa_ok(e2%setup()) ++ ++ call e2%get("timings", timings, error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call e2%get("debug", debug, error_elpa) ++ assert_elpa_ok(error_elpa) ++#if TEST_NVIDIA_GPU == 1 || (TEST_NVIDIA_GPU == 0) && (TEST_AMD_GPU == 0) && (TEST_INTEL_GPU == 0) ++ call e2%get("nvidia-gpu", gpu, error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++#if TEST_AMD_GPU == 1 ++ call e2%get("amd-gpu", gpu, error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++#if TEST_INTEL_GPU == 1 ++ call e2%get("intel-gpu", gpu, error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++ ++ if ((timings .ne. 1) .or. (debug .ne. 1) .or. (gpu .ne. 0)) then ++ print *, "Parameters not stored or loaded correctly. Aborting...", timings, debug, gpu ++ stop 1 ++ endif ++ ++ if(myid == 0) print *, "parameters of e1" ++ call e1%print_settings(error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ if(myid == 0) print *, "" ++ if(myid == 0) print *, "parameters of e2" ++ call e2%print_settings(error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ e_ptr => e2 ++ ++ ++ tune_state => e_ptr%autotune_setup(ELPA_AUTOTUNE_FAST, AUTOTUNE_DOMAIN, error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ ++ iter=0 ++ do while (e_ptr%autotune_step(tune_state, error_elpa)) ++ assert_elpa_ok(error_elpa) ++ ++ iter=iter+1 ++ write(iter_string,'(I5.5)') iter ++ call e_ptr%print_settings(error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call e_ptr%store_settings("saved_parameters_"//trim(iter_string)//".txt", error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call e_ptr%timer_start("eigenvectors: iteration "//trim(iter_string)) ++ call e_ptr%eigenvectors(a, ev, z, error_elpa) ++ assert_elpa_ok(error_elpa) ++ call e_ptr%timer_stop("eigenvectors: iteration "//trim(iter_string)) ++ ++ assert_elpa_ok(error_elpa) ++ if (myid .eq. 0) then ++ print *, "" ++ call e_ptr%print_times("eigenvectors: iteration "//trim(iter_string)) ++ endif ++ status = check_correctness_analytic(na, nev, ev, z, nblk, myid, np_rows, np_cols, my_prow, my_pcol, & ++ .true., .true., print_times=.false.) ++ a(:,:) = as(:,:) ++ call e_ptr%autotune_print_state(tune_state, error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call e_ptr%autotune_save_state(tune_state, "saved_state_"//trim(iter_string)//".txt", error_elpa) ++ assert_elpa_ok(error_elpa) ++#ifdef WITH_MPI ++ ! barrier after save state, file created from one MPI rank only, but loaded everywhere ++ call MPI_BARRIER(MPI_COMM_WORLD, mpierr) ++#endif ++ call e_ptr%autotune_load_state(tune_state, "saved_state_"//trim(iter_string)//".txt", error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ end do ++ ++ ! set and print the autotuned-settings ++ call e_ptr%autotune_set_best(tune_state, error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ if (myid .eq. 0) then ++ print *, "The best combination found by the autotuning:" ++ flush(output_unit) ++ call e_ptr%autotune_print_best(tune_state, error_elpa) ++ assert_elpa_ok(error_elpa) ++ endif ++ ! de-allocate autotune object ++ call elpa_autotune_deallocate(tune_state, error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ if (myid .eq. 0) then ++ print *, "Running once more time with the best found setting..." ++ endif ++ call e_ptr%timer_start("eigenvectors: best setting") ++ call e_ptr%eigenvectors(a, ev, z, error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call e_ptr%timer_stop("eigenvectors: best setting") ++ assert_elpa_ok(error_elpa) ++ if (myid .eq. 0) then ++ print *, "" ++ call e_ptr%print_times("eigenvectors: best setting") ++ endif ++ status = check_correctness_analytic(na, nev, ev, z, nblk, myid, np_rows, np_cols, my_prow, my_pcol, & ++ .true., .true., print_times=.false.) ++ ++ call elpa_deallocate(e_ptr, error_elpa) ++ !assert_elpa_ok(error_elpa) ++ ++ deallocate(a) ++ deallocate(as) ++ deallocate(z) ++ deallocate(ev) ++ ++ call elpa_uninit(error_elpa) ++ !assert_elpa_ok(error_elpa) ++ ++#ifdef WITH_MPI ++ call blacs_gridexit(my_blacs_ctxt) ++ call mpi_finalize(mpierr) ++#endif ++ ++ call exit(status) ++ ++contains ++ subroutine set_basic_params(elpa, na, nev, na_rows, na_cols, my_prow, my_pcol) ++ implicit none ++ class(elpa_t), pointer :: elpa ++ TEST_INT_TYPE, intent(in) :: na, nev, na_rows, na_cols, my_prow, my_pcol ++ ++ call elpa%set("na", int(na,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call elpa%set("nev", int(nev,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call elpa%set("local_nrows", int(na_rows,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call elpa%set("local_ncols", int(na_cols,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call elpa%set("nblk", int(nblk,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++#ifdef WITH_MPI ++ call elpa%set("mpi_comm_parent", int(MPI_COMM_WORLD,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call elpa%set("process_row", int(my_prow,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call elpa%set("process_col", int(my_pcol,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++ end subroutine ++ ++end program +diff -ruN elpa-new_release_2021.11.001/examples/Fortran/test_skewsymmetric.F90 elpa-new_release_2021.11.001_ok/examples/Fortran/test_skewsymmetric.F90 +--- elpa-new_release_2021.11.001/examples/Fortran/test_skewsymmetric.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/Fortran/test_skewsymmetric.F90 2022-01-28 09:50:11.240867016 +0100 +@@ -0,0 +1,431 @@ ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Max Planck Computing and Data Facility (MPCDF), formerly known as ++! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++! ++! ++#include "config-f90.h" ++ ++! Define one of TEST_REAL or TEST_COMPLEX ++! Define one of TEST_SINGLE or TEST_DOUBLE ++! Define one of TEST_SOLVER_1STAGE or TEST_SOLVER_2STAGE ++! Define TEST_NVIDIA_GPU \in [0, 1] ++! Define TEST_INTEL_GPU \in [0, 1] ++! Define TEST_AMD_GPU \in [0, 1] ++! Define either TEST_ALL_KERNELS or a TEST_KERNEL \in [any valid kernel] ++ ++#if !(defined(TEST_REAL) ^ defined(TEST_COMPLEX)) ++error: define exactly one of TEST_REAL or TEST_COMPLEX ++#endif ++ ++#if !(defined(TEST_SINGLE) ^ defined(TEST_DOUBLE)) ++error: define exactly one of TEST_SINGLE or TEST_DOUBLE ++#endif ++ ++#ifdef TEST_SINGLE ++# define EV_TYPE real(kind=C_FLOAT) ++# define EV_TYPE_COMPLEX complex(kind=C_FLOAT_COMPLEX) ++# define MATRIX_TYPE_COMPLEX complex(kind=C_FLOAT_COMPLEX) ++# ifdef TEST_REAL ++# define MATRIX_TYPE real(kind=C_FLOAT) ++# else ++# define MATRIX_TYPE complex(kind=C_FLOAT_COMPLEX) ++# endif ++#else ++# define MATRIX_TYPE_COMPLEX complex(kind=C_DOUBLE_COMPLEX) ++# define EV_TYPE_COMPLEX complex(kind=C_DOUBLE_COMPLEX) ++# define EV_TYPE real(kind=C_DOUBLE) ++# ifdef TEST_REAL ++# define MATRIX_TYPE real(kind=C_DOUBLE) ++# else ++# define MATRIX_TYPE complex(kind=C_DOUBLE_COMPLEX) ++# endif ++#endif ++ ++#ifdef TEST_REAL ++# define AUTOTUNE_DOMAIN ELPA_AUTOTUNE_DOMAIN_REAL ++#else ++# define AUTOTUNE_DOMAIN ELPA_AUTOTUNE_DOMAIN_COMPLEX ++#endif ++#ifdef HAVE_64BIT_INTEGER_MATH_SUPPORT ++#define TEST_INT_TYPE integer(kind=c_int64_t) ++#define INT_TYPE c_int64_t ++#else ++#define TEST_INT_TYPE integer(kind=c_int32_t) ++#define INT_TYPE c_int32_t ++#endif ++#ifdef HAVE_64BIT_INTEGER_MPI_SUPPORT ++#define TEST_INT_MPI_TYPE integer(kind=c_int64_t) ++#define INT_MPI_TYPE c_int64_t ++#else ++#define TEST_INT_MPI_TYPE integer(kind=c_int32_t) ++#define INT_MPI_TYPE c_int32_t ++#endif ++ ++#define TEST_GPU 0 ++#if (TEST_NVIDIA_GPU == 1) || (TEST_AMD_GPU == 1) || (TEST_INTEL_GPU == 1) ++#undef TEST_GPU ++#define TEST_GPU 1 ++#endif ++ ++#include "assert.h" ++ ++program test ++ use elpa ++ ++ !use test_util ++ use test_setup_mpi ++ use test_prepare_matrix ++ use test_read_input_parameters ++ use test_blacs_infrastructure ++ use test_check_correctness ++ use precision_for_tests ++ use iso_fortran_env ++ ++#ifdef HAVE_REDIRECT ++ use test_redirect ++#endif ++ implicit none ++ ++ ! matrix dimensions ++ TEST_INT_TYPE :: na, nev, nblk ++ ++ ! mpi ++ TEST_INT_TYPE :: myid, nprocs ++ TEST_INT_TYPE :: na_cols, na_rows ! local matrix size ++ TEST_INT_TYPE :: np_cols, np_rows ! number of MPI processes per column/row ++ TEST_INT_TYPE :: my_prow, my_pcol ! local MPI task position (my_prow, my_pcol) in the grid (0..np_cols -1, 0..np_rows -1) ++ TEST_INT_MPI_TYPE :: mpierr ++ ++ ! blacs ++ character(len=1) :: layout ++ TEST_INT_TYPE :: my_blacs_ctxt, sc_desc(9), info, nprow, npcol ++ ++ ! The Matrix ++ MATRIX_TYPE, allocatable :: a_skewsymmetric(:,:), as_skewsymmetric(:,:) ++ MATRIX_TYPE_COMPLEX, allocatable :: a_complex(:,:), as_complex(:,:) ++ ! eigenvectors ++ MATRIX_TYPE, allocatable :: z_skewsymmetric(:,:) ++ MATRIX_TYPE_COMPLEX, allocatable :: z_complex(:,:) ++ ! eigenvalues ++ EV_TYPE, allocatable :: ev_skewsymmetric(:), ev_complex(:) ++ ++ TEST_INT_TYPE :: status, i, j ++ integer(kind=c_int) :: error_elpa ++ ++ type(output_t) :: write_to_file ++ class(elpa_t), pointer :: e_complex, e_skewsymmetric ++ ++ call read_input_parameters(na, nev, nblk, write_to_file) ++! call setup_mpi(myid, nprocs) ++ call MPI_INIT_THREAD(MPI_THREAD_SERIALIZED,info, mpierr) ++ call MPI_COMM_SIZE(MPI_COMM_WORLD,nprocs, mpierr) ++ call MPI_COMM_RANK(MPI_COMM_WORLD,myid, mpierr) ++#ifdef HAVE_REDIRECT ++#ifdef WITH_MPI ++ call MPI_BARRIER(MPI_COMM_WORLD, mpierr) ++ call redirect_stdout(myid) ++#endif ++#endif ++ ++ if (elpa_init(CURRENT_API_VERSION) /= ELPA_OK) then ++ print *, "ELPA API version not supported" ++ stop 1 ++ endif ++! ++ layout = 'C' ++ do np_cols = NINT(SQRT(REAL(nprocs))),2,-1 ++ if(mod(nprocs,np_cols) == 0 ) exit ++ enddo ++ np_rows = nprocs/np_cols ++ assert(nprocs == np_rows * np_cols) ++ ++ if (myid == 0) then ++ print '((a,i0))', 'Matrix size: ', na ++ print '((a,i0))', 'Num eigenvectors: ', nev ++ print '((a,i0))', 'Blocksize: ', nblk ++#ifdef WITH_MPI ++ print '((a,i0))', 'Num MPI proc: ', nprocs ++ print '(3(a,i0))','Number of processor rows=',np_rows,', cols=',np_cols,', total=',nprocs ++ print '(a)', 'Process layout: ' // layout ++#endif ++ print *,'' ++ endif ++ ++ call set_up_blacsgrid(int(mpi_comm_world,kind=BLAS_KIND), np_rows, & ++ np_cols, layout, & ++ my_blacs_ctxt, my_prow, my_pcol) ++ ++ call set_up_blacs_descriptor(na, nblk, my_prow, my_pcol, np_rows, np_cols, & ++ na_rows, na_cols, sc_desc, my_blacs_ctxt, info) ++ ++ allocate(a_skewsymmetric (na_rows,na_cols)) ++ allocate(as_skewsymmetric(na_rows,na_cols)) ++ allocate(z_skewsymmetric (na_rows,2*na_cols)) ++ allocate(ev_skewsymmetric(na)) ++ ++ a_skewsymmetric(:,:) = 0.0 ++ z_skewsymmetric(:,:) = 0.0 ++ ev_skewsymmetric(:) = 0.0 ++ ++ call prepare_matrix_random(na, myid, sc_desc, a_skewsymmetric, & ++ z_skewsymmetric(:,1:na_cols), as_skewsymmetric, is_skewsymmetric=1) ++ ++ !call MPI_BARRIER(MPI_COMM_WORLD, mpierr) ++ as_skewsymmetric(:,:) = a_skewsymmetric(:,:) ++ ++ ++ ! prepare the complex matrix for the "brute force" case ++ allocate(a_complex (na_rows,na_cols)) ++ allocate(as_complex(na_rows,na_cols)) ++ allocate(z_complex (na_rows,na_cols)) ++ allocate(ev_complex(na)) ++ ++ a_complex(1:na_rows,1:na_cols) = 0.0 ++ z_complex(1:na_rows,1:na_cols) = 0.0 ++ as_complex(1:na_rows,1:na_cols) = 0.0 ++ ++ ++ do j=1, na_cols ++ do i=1,na_rows ++#ifdef TEST_DOUBLE ++ a_complex(i,j) = dcmplx(0.0, a_skewsymmetric(i,j)) ++#endif ++#ifdef TEST_SINGLE ++ a_complex(i,j) = cmplx(0.0, a_skewsymmetric(i,j)) ++#endif ++ enddo ++ enddo ++ ++ ++ ++ z_complex(1:na_rows,1:na_cols) = a_complex(1:na_rows,1:na_cols) ++ as_complex(1:na_rows,1:na_cols) = a_complex(1:na_rows,1:na_cols) ++ ++ ! first set up and solve the brute force problem ++ e_complex => elpa_allocate(error_elpa) ++ call set_basic_params(e_complex, na, nev, na_rows, na_cols, my_prow, my_pcol) ++ ++ call e_complex%set("timings",1, error_elpa) ++ ++ call e_complex%set("debug",1,error_elpa) ++ ++#if TEST_NVIDIA_GPU == 1 || (TEST_NVIDIA_GPU == 0) && (TEST_AMD_GPU == 0) && (TEST_INTEL_GPU == 0) ++ call e_complex%set("nvidia-gpu", TEST_GPU,error_elpa) ++#endif ++#if TEST_AMD_GPU == 1 ++ call e_complex%set("amd-gpu", TEST_GPU,error_elpa) ++#endif ++#if TEST_INTEL_GPU == 1 ++ call e_complex%set("intel-gpu", TEST_GPU,error_elpa) ++#endif ++ ++ call e_complex%set("omp_threads", 8, error_elpa) ++ ++ assert_elpa_ok(e_complex%setup()) ++ call e_complex%set("solver", elpa_solver_2stage, error_elpa) ++ ++ call e_complex%timer_start("eigenvectors: brute force as complex matrix") ++ call e_complex%eigenvectors(a_complex, ev_complex, z_complex, error_elpa) ++ call e_complex%timer_stop("eigenvectors: brute force as complex matrix") ++ ++ if (myid .eq. 0) then ++ print *, "" ++ call e_complex%print_times("eigenvectors: brute force as complex matrix") ++ endif ++#ifdef WITH_MPI ++ call MPI_BARRIER(MPI_COMM_WORLD, mpierr) ++#endif ++! as_complex(:,:) = z_complex(:,:) ++#ifdef TEST_SINGLE ++ status = check_correctness_evp_numeric_residuals_complex_single(na, nev, as_complex, z_complex, ev_complex, sc_desc, & ++ nblk, myid, np_rows,np_cols, my_prow, my_pcol) ++#else ++ status = check_correctness_evp_numeric_residuals_complex_double(na, nev, as_complex, z_complex, ev_complex, sc_desc, & ++ nblk, myid, np_rows,np_cols, my_prow, my_pcol) ++#endif ++ status = 0 ++ call check_status(status, myid) ++ ++#ifdef WITH_MPI ++ call MPI_BARRIER(MPI_COMM_WORLD, mpierr) ++#endif ++ ! now run the skewsymmetric case ++ e_skewsymmetric => elpa_allocate(error_elpa) ++ call set_basic_params(e_skewsymmetric, na, nev, na_rows, na_cols, my_prow, my_pcol) ++ ++ call e_skewsymmetric%set("timings",1, error_elpa) ++ ++ call e_skewsymmetric%set("debug",1,error_elpa) ++ ++#if TEST_NVIDIA_GPU == 1 || (TEST_NVIDIA_GPU == 0) && (TEST_AMD_GPU == 0) && (TEST_INTEL_GPU == 0) ++ call e_skewsymmetric%set("nvidia-gpu", TEST_GPU,error_elpa) ++#endif ++#if TEST_AMD_GPU == 1 ++ call e_skewsymmetric%set("amd-gpu", TEST_GPU,error_elpa) ++#endif ++#if TEST_INTEL_GPU == 1 ++ call e_skewsymmetric%set("intel-gpu", TEST_GPU,error_elpa) ++#endif ++ call e_skewsymmetric%set("omp_threads",8, error_elpa) ++ ++ assert_elpa_ok(e_skewsymmetric%setup()) ++ ++ call e_skewsymmetric%set("solver", elpa_solver_2stage, error_elpa) ++ ++ call e_skewsymmetric%timer_start("eigenvectors: skewsymmetric ") ++ call e_skewsymmetric%skew_eigenvectors(a_skewsymmetric, ev_skewsymmetric, z_skewsymmetric, error_elpa) ++ call e_skewsymmetric%timer_stop("eigenvectors: skewsymmetric ") ++ ++ if (myid .eq. 0) then ++ print *, "" ++ call e_skewsymmetric%print_times("eigenvectors: skewsymmetric") ++ endif ++ ++ ! check eigenvalues ++ do i=1, na ++ if (myid == 0) then ++#ifdef TEST_DOUBLE ++ if (abs(ev_complex(i)-ev_skewsymmetric(i))/abs(ev_complex(i)) .gt. 1e-10) then ++#endif ++#ifdef TEST_SINGLE ++ if (abs(ev_complex(i)-ev_skewsymmetric(i))/abs(ev_complex(i)) .gt. 1e-4) then ++#endif ++ print *,"ev: i=",i,ev_complex(i),ev_skewsymmetric(i) ++ status = 1 ++ endif ++ endif ++ enddo ++ ++ ++! call check_status(status, myid) ++ ++ z_complex(:,:) = 0 ++ do j=1, na_cols ++ do i=1,na_rows ++#ifdef TEST_DOUBLE ++ z_complex(i,j) = dcmplx(z_skewsymmetric(i,j), z_skewsymmetric(i,na_cols+j)) ++#endif ++#ifdef TEST_SINGLE ++ z_complex(i,j) = cmplx(z_skewsymmetric(i,j), z_skewsymmetric(i,na_cols+j)) ++#endif ++ enddo ++ enddo ++#ifdef WITH_MPI ++ call MPI_BARRIER(MPI_COMM_WORLD, mpierr) ++#endif ++ ++#ifdef TEST_SINGLE ++ status = check_correctness_evp_numeric_residuals_ss_real_single(na, nev, as_skewsymmetric, z_complex, ev_skewsymmetric, & ++ sc_desc, nblk, myid, np_rows,np_cols, my_prow, my_pcol) ++#else ++ status = check_correctness_evp_numeric_residuals_ss_real_double(na, nev, as_skewsymmetric, z_complex, ev_skewsymmetric, & ++ sc_desc, nblk, myid, np_rows,np_cols, my_prow, my_pcol) ++#endif ++ ++#ifdef WITH_MPI ++ call MPI_BARRIER(MPI_COMM_WORLD, mpierr) ++#endif ++ call elpa_deallocate(e_complex,error_elpa) ++ call elpa_deallocate(e_skewsymmetric,error_elpa) ++ ++ ++ !to do ++ ! - check whether brute-force check_correctness_evp_numeric_residuals worsk (complex ev) ++ ! - invent a test for skewsymmetric residuals ++ ++ deallocate(a_complex) ++ deallocate(as_complex) ++ deallocate(z_complex) ++ deallocate(ev_complex) ++ ++ deallocate(a_skewsymmetric) ++ deallocate(as_skewsymmetric) ++ deallocate(z_skewsymmetric) ++ deallocate(ev_skewsymmetric) ++ call elpa_uninit(error_elpa) ++ ++ ++ ++#ifdef WITH_MPI ++ call blacs_gridexit(my_blacs_ctxt) ++ call mpi_finalize(mpierr) ++#endif ++ ++ call exit(status) ++ ++contains ++ subroutine set_basic_params(elpa, na, nev, na_rows, na_cols, my_prow, my_pcol) ++ implicit none ++ class(elpa_t), pointer :: elpa ++ TEST_INT_TYPE, intent(in) :: na, nev, na_rows, na_cols, my_prow, my_pcol ++ ++ call elpa%set("na", int(na,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call elpa%set("nev", int(nev,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call elpa%set("local_nrows", int(na_rows,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call elpa%set("local_ncols", int(na_cols,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call elpa%set("nblk", int(nblk,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++#ifdef WITH_MPI ++ call elpa%set("mpi_comm_parent", int(MPI_COMM_WORLD,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call elpa%set("process_row", int(my_prow,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call elpa%set("process_col", int(my_pcol,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++ end subroutine ++ subroutine check_status(status, myid) ++ implicit none ++ TEST_INT_TYPE, intent(in) :: status, myid ++ TEST_INT_MPI_TYPE :: mpierr ++ if (status /= 0) then ++ if (myid == 0) print *, "Result incorrect!" ++#ifdef WITH_MPI ++ call mpi_finalize(mpierr) ++#endif ++ call exit(status) ++ endif ++ end subroutine ++end program +diff -ruN elpa-new_release_2021.11.001/examples/Fortran/test_split_comm.F90 elpa-new_release_2021.11.001_ok/examples/Fortran/test_split_comm.F90 +--- elpa-new_release_2021.11.001/examples/Fortran/test_split_comm.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/Fortran/test_split_comm.F90 2022-02-01 17:13:58.420500580 +0100 +@@ -0,0 +1,346 @@ ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Max Planck Computing and Data Facility (MPCDF), formerly known as ++! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++! ++! ++#include "config-f90.h" ++ ++! Define one of TEST_REAL or TEST_COMPLEX ++! Define one of TEST_SINGLE or TEST_DOUBLE ++! Define one of TEST_SOLVER_1STAGE or TEST_SOLVER_2STAGE ++! Define TEST_NVIDIA_GPU \in [0, 1] ++! Define TEST_INTEL_GPU \in [0, 1] ++! Define either TEST_ALL_KERNELS or a TEST_KERNEL \in [any valid kernel] ++ ++#if !(defined(TEST_REAL) ^ defined(TEST_COMPLEX)) ++error: define exactly one of TEST_REAL or TEST_COMPLEX ++#endif ++ ++#if !(defined(TEST_SINGLE) ^ defined(TEST_DOUBLE)) ++error: define exactly one of TEST_SINGLE or TEST_DOUBLE ++#endif ++ ++#ifdef TEST_SINGLE ++# define EV_TYPE real(kind=C_FLOAT) ++# ifdef TEST_REAL ++# define MATRIX_TYPE real(kind=C_FLOAT) ++# else ++# define MATRIX_TYPE complex(kind=C_FLOAT_COMPLEX) ++# endif ++#else ++# define EV_TYPE real(kind=C_DOUBLE) ++# ifdef TEST_REAL ++# define MATRIX_TYPE real(kind=C_DOUBLE) ++# else ++# define MATRIX_TYPE complex(kind=C_DOUBLE_COMPLEX) ++# endif ++#endif ++ ++ ++#ifdef TEST_REAL ++# define AUTOTUNE_DOMAIN ELPA_AUTOTUNE_DOMAIN_REAL ++#else ++# define AUTOTUNE_DOMAIN ELPA_AUTOTUNE_DOMAIN_COMPLEX ++#endif ++ ++ ++#ifdef HAVE_64BIT_INTEGER_MATH_SUPPORT ++#define TEST_INT_TYPE integer(kind=c_int64_t) ++#define INT_TYPE c_int64_t ++#else ++#define TEST_INT_TYPE integer(kind=c_int32_t) ++#define INT_TYPE c_int32_t ++#endif ++ ++#ifdef HAVE_64BIT_INTEGER_MPI_SUPPORT ++#define TEST_INT_MPI_TYPE integer(kind=c_int64_t) ++#define INT_MPI_TYPE c_int64_t ++#else ++#define TEST_INT_MPI_TYPE integer(kind=c_int32_t) ++#define INT_MPI_TYPE c_int32_t ++#endif ++#include "assert.h" ++ ++program test ++ use elpa ++ ++ !use test_util ++ use test_setup_mpi ++ use test_prepare_matrix ++ use test_read_input_parameters ++ use test_blacs_infrastructure ++ use test_check_correctness ++ use test_analytic ++ use iso_fortran_env ++ ++#ifdef HAVE_REDIRECT ++ use test_redirect ++#endif ++ implicit none ++ ++ ! matrix dimensions ++ TEST_INT_TYPE :: na, nev, nblk ++ TEST_INT_TYPE :: num_groups, group_size, color, key ++ ++ ! mpi ++ TEST_INT_TYPE :: myid, nprocs ++ TEST_INT_TYPE :: na_cols, na_rows ! local matrix size ++ TEST_INT_TYPE :: np_cols, np_rows ! number of MPI processes per column/row ++ TEST_INT_TYPE :: my_prow, my_pcol ! local MPI task position (my_prow, my_pcol) in the grid (0..np_cols -1, 0..np_rows -1) ++ TEST_INT_MPI_TYPE :: mpierr, ierr,mpi_sub_commMPI, myidMPI, nprocsMPI, colorMPI, keyMPI, & ++ myid_subMPI, nprocs_subMPI ++ TEST_INT_TYPE :: mpi_sub_comm ++ TEST_INT_TYPE :: myid_sub, nprocs_sub ++ ++ ! blacs ++ character(len=1) :: layout ++ TEST_INT_TYPE :: my_blacs_ctxt, sc_desc(9), info, nprow, npcol ++ ++ ! The Matrix ++ MATRIX_TYPE, allocatable :: a(:,:), as(:,:) ++ ! eigenvectors ++ MATRIX_TYPE, allocatable :: z(:,:) ++ ! eigenvalues ++ EV_TYPE, allocatable :: ev(:) ++ ++ TEST_INT_TYPE :: status ++ integer(kind=c_int) :: error_elpa ++ ++ type(output_t) :: write_to_file ++ class(elpa_t), pointer :: e ++ ++ TEST_INT_TYPE :: iter ++ character(len=5) :: iter_string ++ ++ status = 0 ++#ifdef WITH_MPI ++ ++ call read_input_parameters(na, nev, nblk, write_to_file) ++ !call setup_mpi(myid, nprocs) ++ call mpi_init_thread(MPI_THREAD_SERIALIZED, info, mpierr) ++ call mpi_comm_rank(mpi_comm_world, myidMPI,mpierr) ++ call mpi_comm_size(mpi_comm_world, nprocsMPI,mpierr) ++ myid = int(myidMPI,kind=BLAS_KIND) ++ nprocs = int(nprocsMPI,kind=BLAS_KIND) ++ ++ if((mod(nprocs, 4) == 0) .and. (nprocs > 4)) then ++ num_groups = 4 ++ else if(mod(nprocs, 3) == 0) then ++ num_groups = 3 ++ else if(mod(nprocs, 2) == 0) then ++ num_groups = 2 ++ else ++ num_groups = 1 ++ endif ++ ++ group_size = nprocs / num_groups ++ ++ if(num_groups * group_size .ne. nprocs) then ++ print *, "Something went wrong before splitting the communicator" ++ stop 1 ++ else ++ if(myid == 0) then ++ print '((a,i0,a,i0))', "The test will split the global communicator into ", num_groups, " groups of size ", group_size ++ endif ++ endif ++ ++ ! each group of processors will have the same color ++ color = mod(myid, num_groups) ++ ! this will determine the myid in each group ++ key = myid/num_groups ++ !split the communicator ++ colorMPI=int(color,kind=MPI_KIND) ++ keyMPI = int(key, kind=MPI_KIND) ++ call mpi_comm_split(mpi_comm_world, colorMPI, keyMPI, mpi_sub_commMPI, mpierr) ++ mpi_sub_comm = int(mpi_sub_commMPI,kind=BLAS_KIND) ++ color = int(colorMPI,kind=BLAS_KIND) ++ key = int(keyMPI,kind=BLAS_KIND) ++ if(mpierr .ne. MPI_SUCCESS) then ++ print *, "communicator splitting not successfull", mpierr ++ stop 1 ++ endif ++ ++ call mpi_comm_rank(mpi_sub_commMPI, myid_subMPI, mpierr) ++ call mpi_comm_size(mpi_sub_commMPI, nprocs_subMPI, mpierr) ++ myid_sub = int(myid_subMPI,kind=BLAS_KIND) ++ nprocs_sub = int(nprocs_subMPI,kind=BLAS_KIND) ++ ++ !print *, "glob ", myid, nprocs, ", loc ", myid_sub, nprocs_sub, ", color ", color, ", key ", key ++ ++ if((mpierr .ne. MPI_SUCCESS) .or. (nprocs_sub .ne. group_size) .or. (myid_sub >= group_size)) then ++ print *, "something wrong with the sub communicators" ++ stop 1 ++ endif ++ ++ ++#ifdef HAVE_REDIRECT ++ call MPI_BARRIER(MPI_COMM_WORLD, mpierr) ++ call redirect_stdout(myid) ++#endif ++ ++ if (elpa_init(CURRENT_API_VERSION) /= ELPA_OK) then ++ print *, "ELPA API version not supported" ++ stop 1 ++ endif ++ ++ layout = 'C' ++ do np_cols = NINT(SQRT(REAL(nprocs_sub))),2,-1 ++ if(mod(nprocs_sub,np_cols) == 0 ) exit ++ enddo ++ np_rows = nprocs_sub/np_cols ++ assert(nprocs_sub == np_rows * np_cols) ++ assert(nprocs == np_rows * np_cols * num_groups) ++ ++ if (myid == 0) then ++ print '((a,i0))', 'Matrix size: ', na ++ print '((a,i0))', 'Num eigenvectors: ', nev ++ print '((a,i0))', 'Blocksize: ', nblk ++ print '(a)', 'Process layout: ' // layout ++ print *,'' ++ endif ++ if (myid_sub == 0) then ++ print '(4(a,i0))','GROUP ', color, ': Number of processor rows=',np_rows,', cols=',np_cols,', total=',nprocs_sub ++ endif ++ ++ ! USING the subcommunicator ++ call set_up_blacsgrid(int(mpi_sub_comm,kind=BLAS_KIND), np_rows, np_cols, layout, & ++ my_blacs_ctxt, my_prow, my_pcol) ++ ++ call set_up_blacs_descriptor(na, nblk, my_prow, my_pcol, np_rows, np_cols, & ++ na_rows, na_cols, sc_desc, my_blacs_ctxt, info) ++ ++ allocate(a (na_rows,na_cols)) ++ allocate(as(na_rows,na_cols)) ++ allocate(z (na_rows,na_cols)) ++ allocate(ev(na)) ++ ++ a(:,:) = 0.0 ++ z(:,:) = 0.0 ++ ev(:) = 0.0 ++ ++ !call prepare_matrix_analytic(na, a, nblk, myid_sub, np_rows, np_cols, my_prow, my_pcol, print_times=.false.) ++ call prepare_matrix_random(na, myid_sub, sc_desc, a, z, as) ++ as(:,:) = a(:,:) ++ ++ e => elpa_allocate(error_elpa) ++ call set_basic_params(e, na, nev, na_rows, na_cols, mpi_sub_comm, my_prow, my_pcol) ++ ++ call e%set("timings",1, error_elpa) ++ ++ call e%set("debug",1, error_elpa) ++#if TEST_NVIDIA_GPU == 1 || (TEST_NVIDIA_GPU == 0) && (TEST_AMD_GPU == 0) && (TEST_INTEL_GPU == 0) ++ call e%set("nvidia-gpu", 0, error_elpa) ++#endif ++#if TEST_INTEL_GPU == 1 ++ call e%set("intel-gpu", 0, error_elpa) ++#endif ++ !call e%set("max_stored_rows", 15, error_elpa) ++ ++ assert_elpa_ok(e%setup()) ++ ++ ++ ++! if(myid == 0) print *, "parameters of e" ++! call e%print_all_parameters() ++! if(myid == 0) print *, "" ++ ++ ++ call e%timer_start("eigenvectors") ++ call e%eigenvectors(a, ev, z, error_elpa) ++ call e%timer_stop("eigenvectors") ++ ++ assert_elpa_ok(error_elpa) ++ ++ !status = check_correctness_analytic(na, nev, ev, z, nblk, myid_sub, np_rows, np_cols, my_prow, my_pcol, & ++ ! .true., .true., print_times=.false.) ++ status = check_correctness_evp_numeric_residuals(na, nev, as, z, ev, sc_desc, nblk, myid_sub, & ++ np_rows,np_cols, my_prow, my_pcol) ++ if (status /= 0) & ++ print *, "processor ", myid, ": Result incorrect for processor group ", color ++ ++ if (myid .eq. 0) then ++ print *, "Showing times of one goup only" ++ call e%print_times("eigenvectors") ++ endif ++ ++ call elpa_deallocate(e, error_elpa) ++ ++ deallocate(a) ++ deallocate(as) ++ deallocate(z) ++ deallocate(ev) ++ ++ call elpa_uninit(error_elpa) ++ ++ call blacs_gridexit(my_blacs_ctxt) ++ call mpi_finalize(mpierr) ++ ++#endif ++ call exit(status) ++ ++contains ++ subroutine set_basic_params(elpa, na, nev, na_rows, na_cols, communicator, my_prow, my_pcol) ++ use iso_c_binding ++ implicit none ++ class(elpa_t), pointer :: elpa ++ TEST_INT_TYPE, intent(in) :: na, nev, na_rows, na_cols, my_prow, my_pcol, communicator ++ ++#ifdef WITH_MPI ++ call elpa%set("na", int(na,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call elpa%set("nev", int(nev,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call elpa%set("local_nrows", int(na_rows,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call elpa%set("local_ncols", int(na_cols,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call elpa%set("nblk", int(nblk,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ ++ call elpa%set("mpi_comm_parent", int(communicator,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call elpa%set("process_row", int(my_prow,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++ call elpa%set("process_col", int(my_pcol,kind=c_int), error_elpa) ++ assert_elpa_ok(error_elpa) ++#endif ++ end subroutine ++ ++end program +diff -ruN elpa-new_release_2021.11.001/examples/Makefile_hybrid elpa-new_release_2021.11.001_ok/examples/Makefile_hybrid +--- elpa-new_release_2021.11.001/examples/Makefile_hybrid 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/Makefile_hybrid 2022-01-28 09:56:18.548921000 +0100 +@@ -0,0 +1,24 @@ ++# MPICH, that is IntelMPI or ParaStationMPI ++SCALAPACK_LIB = -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64 ++# OpenMPI ++# SCALAPACK_LIB = -lmkl_scalapack_lp64 $(MKLROOT)/lib/intel64/libmkl_blacs_openmpi_lp64.a ++LAPACK_LIB = ++# Intel compiler ++MKL = -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lstdc++ ++# GCC ++# MKL = -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lstdc++ -lm ++F90 = mpif90 -O3 -qopenmp -I$(ELPA_MODULES_OPENMP) -I$(ELPA_INCLUDE_OPENMP) -I$(ELPA_INCLUDE_OPENMP)/elpa ++# GCC ++# F90 = mpif90 -O3 -fopenmp -I$(ELPA_MODULES_OPENMP) -I$(ELPA_INCLUDE_OPENMP) -I$(ELPA_INCLUDE_OPENMP)/elpa ++LIBS = -L$(ELPA_LIB_OPENMP) -lelpa_openmp -lelpatest_openmp -lelpa $(SCALAPACK_LIB) $(MKL) ++CC = mpicc -O3 -qopenmp ++# GCC ++# CC = mpicc -O3 -fopenmp ++ ++all: test_real_e1_omp test_real_e2_omp ++ ++test_real_e1_omp: test_real_e1.F90 ++ $(F90) -DWITH_OPENMP_TRADITIONAL -o $@ test_real_e1.F90 $(LIBS) ++ ++test_real_e2_omp: test_real_e2.F90 ++ $(F90) -DWITH_OPENMP_TRADITIONAL -o $@ test_real_e2.F90 $(LIBS) +diff -ruN elpa-new_release_2021.11.001/examples/Makefile_pure elpa-new_release_2021.11.001_ok/examples/Makefile_pure +--- elpa-new_release_2021.11.001/examples/Makefile_pure 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/Makefile_pure 2022-01-26 09:43:16.599164000 +0100 +@@ -0,0 +1,20 @@ ++# MPICH, that is IntelMPI or ParaStationMPI ++SCALAPACK_LIB = -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64 ++# OpenMPI ++# SCALAPACK_LIB = -lmkl_scalapack_lp64 $(MKLROOT)/lib/intel64/libmkl_blacs_openmpi_lp64.a ++LAPACK_LIB = ++# Intel compiler ++MKL = -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -liomp5 -lpthread -lstdc++ ++# GCC ++# MKL = -lmkl_gf_lp64 -lmkl_sequential -lmkl_core -lgomp -lpthread -lstdc++ -lm ++F90 = mpif90 -O3 -I$(ELPA_MODULES) -I$(ELPA_INCLUDE) -I$(ELPA_INCLUDE)/elpa ++LIBS = -L$(ELPA_LIB) -lelpa -lelpatest $(SCALAPACK_LIB) $(MKL) ++CC = mpicc -O3 ++ ++all: test_real_e1 test_real_e2 ++ ++test_real_e1: test_real_e1.F90 ++ $(F90) -o $@ test_real_e1.F90 $(LIBS) ++ ++test_real_e2: test_real_e2.F90 ++ $(F90) -o $@ test_real_e2.F90 $(LIBS) +diff -ruN elpa-new_release_2021.11.001/examples/Makefile_pure_cuda elpa-new_release_2021.11.001_ok/examples/Makefile_pure_cuda +--- elpa-new_release_2021.11.001/examples/Makefile_pure_cuda 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/Makefile_pure_cuda 2022-01-26 09:43:16.600617000 +0100 +@@ -0,0 +1,20 @@ ++# MPICH, that is IntelMPI or ParaStationMPI ++SCALAPACK_LIB = -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64 ++# OpenMPI ++# SCALAPACK_LIB = -lmkl_scalapack_lp64 $(MKLROOT)/lib/intel64/libmkl_blacs_openmpi_lp64.a ++LAPACK_LIB = ++# Intel compiler ++MKL = -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -liomp5 -lpthread -lstdc++ ++# GCC ++# MKL = -lmkl_gf_lp64 -lmkl_sequential -lmkl_core -lgomp -lpthread -lstdc++ -lm ++F90 = mpif90 -O3 -I$(ELPA_MODULES) -I$(ELPA_INCLUDE) -I$(ELPA_INCLUDE)/elpa ++LIBS = -L$(ELPA_LIB) -lelpa -lelpatest $(SCALAPACK_LIB) $(MKL) -lcudart ++CC = mpicc -O3 ++ ++all: test_real_e1 test_real_e2 ++ ++test_real_e1: test_real_e1.F90 ++ $(F90) -DCUDA -o $@ test_real_e1.F90 $(LIBS) ++ ++test_real_e2: test_real_e2.F90 ++ $(F90) -DCUDA -DCUDAKERNEL -o $@ test_real_e2.F90 $(LIBS) +diff -ruN elpa-new_release_2021.11.001/examples/shared/GPU/CUDA/test_cuda.F90 elpa-new_release_2021.11.001_ok/examples/shared/GPU/CUDA/test_cuda.F90 +--- elpa-new_release_2021.11.001/examples/shared/GPU/CUDA/test_cuda.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/shared/GPU/CUDA/test_cuda.F90 2022-01-26 10:10:58.319812000 +0100 +@@ -0,0 +1,455 @@ ++! Copyright 2014, A. Marek ++! ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++! ++! This file was written by A. Marek, MPCDF ++ ++ ++#include "config-f90.h" ++module test_cuda_functions ++ use, intrinsic :: iso_c_binding ++ use precision_for_tests ++ implicit none ++ ++ public ++ ++ integer(kind=ik) :: cudaMemcpyHostToDevice ++ integer(kind=ik) :: cudaMemcpyDeviceToHost ++ integer(kind=ik) :: cudaMemcpyDeviceToDevice ++ integer(kind=ik) :: cudaHostRegisterDefault ++ integer(kind=ik) :: cudaHostRegisterPortable ++ integer(kind=ik) :: cudaHostRegisterMapped ++ ++ ! TODO global variable, has to be changed ++ integer(kind=C_intptr_T) :: cublasHandle = -1 ++ ++ ! functions to set and query the CUDA devices ++ interface ++ function cuda_setdevice_c(n) result(istat) & ++ bind(C, name="cudaSetDeviceFromC") ++ ++ use, intrinsic :: iso_c_binding ++ implicit none ++ integer(kind=C_INT), value :: n ++ integer(kind=C_INT) :: istat ++ end function cuda_setdevice_c ++ end interface ++ ++ ! functions to copy CUDA memory ++ interface ++ function cuda_memcpyDeviceToDevice_c() result(flag) & ++ bind(C, name="cudaMemcpyDeviceToDeviceFromC") ++ use, intrinsic :: iso_c_binding ++ implicit none ++ integer(kind=c_int) :: flag ++ end function ++ end interface ++ ++ interface ++ function cuda_memcpyHostToDevice_c() result(flag) & ++ bind(C, name="cudaMemcpyHostToDeviceFromC") ++ use, intrinsic :: iso_c_binding ++ implicit none ++ integer(kind=c_int) :: flag ++ end function ++ end interface ++ ++ interface ++ function cuda_memcpyDeviceToHost_c() result(flag) & ++ bind(C, name="cudaMemcpyDeviceToHostFromC") ++ use, intrinsic :: iso_c_binding ++ implicit none ++ integer(kind=c_int) :: flag ++ end function ++ end interface ++ ++ interface ++ function cuda_hostRegisterDefault_c() result(flag) & ++ bind(C, name="cudaHostRegisterDefaultFromC") ++ use, intrinsic :: iso_c_binding ++ implicit none ++ integer(kind=c_int) :: flag ++ end function ++ end interface ++ ++ interface ++ function cuda_hostRegisterPortable_c() result(flag) & ++ bind(C, name="cudaHostRegisterPortableFromC") ++ use, intrinsic :: iso_c_binding ++ implicit none ++ integer(kind=c_int) :: flag ++ end function ++ end interface ++ ++ interface ++ function cuda_hostRegisterMapped_c() result(flag) & ++ bind(C, name="cudaHostRegisterMappedFromC") ++ use, intrinsic :: iso_c_binding ++ implicit none ++ integer(kind=c_int) :: flag ++ end function ++ end interface ++ ++ interface ++ function cuda_memcpy_intptr_c(dst, src, size, dir) result(istat) & ++ bind(C, name="cudaMemcpyFromC") ++ ++ use, intrinsic :: iso_c_binding ++ ++ implicit none ++ integer(kind=C_intptr_t), value :: dst ++ integer(kind=C_intptr_t), value :: src ++ integer(kind=c_intptr_t), intent(in), value :: size ++ integer(kind=C_INT), intent(in), value :: dir ++ integer(kind=C_INT) :: istat ++ ++ end function cuda_memcpy_intptr_c ++ end interface ++ ++ interface ++ function cuda_memcpy_cptr_c(dst, src, size, dir) result(istat) & ++ bind(C, name="cudaMemcpyFromC") ++ ++ use, intrinsic :: iso_c_binding ++ ++ implicit none ++ type(c_ptr), value :: dst ++ type(c_ptr), value :: src ++ integer(kind=c_intptr_t), intent(in), value :: size ++ integer(kind=C_INT), intent(in), value :: dir ++ integer(kind=C_INT) :: istat ++ ++ end function cuda_memcpy_cptr_c ++ end interface ++ ++ interface ++ function cuda_memcpy_mixed_c(dst, src, size, dir) result(istat) & ++ bind(C, name="cudaMemcpyFromC") ++ ++ use, intrinsic :: iso_c_binding ++ ++ implicit none ++ type(c_ptr), value :: dst ++ integer(kind=C_intptr_t), value :: src ++ integer(kind=c_intptr_t), intent(in), value :: size ++ integer(kind=C_INT), intent(in), value :: dir ++ integer(kind=C_INT) :: istat ++ ++ end function cuda_memcpy_mixed_c ++ end interface ++ ++ ! functions to allocate and free CUDA memory ++ ++ interface ++ function cuda_free_intptr_c(a) result(istat) & ++ bind(C, name="cudaFreeFromC") ++ ++ use, intrinsic :: iso_c_binding ++ ++ implicit none ++ integer(kind=C_intptr_T), value :: a ++ integer(kind=C_INT) :: istat ++ ++ end function cuda_free_intptr_c ++ end interface ++ ++ interface ++ function cuda_free_cptr_c(a) result(istat) & ++ bind(C, name="cudaFreeFromC") ++ ++ use, intrinsic :: iso_c_binding ++ ++ implicit none ++ type(c_ptr), value :: a ++ integer(kind=C_INT) :: istat ++ ++ end function cuda_free_cptr_c ++ end interface ++ ++ interface cuda_memcpy ++ module procedure cuda_memcpy_intptr ++ module procedure cuda_memcpy_cptr ++ module procedure cuda_memcpy_mixed ++ end interface ++ ++ interface cuda_free ++ module procedure cuda_free_intptr ++ module procedure cuda_free_cptr ++ end interface ++ ++ interface ++ function cuda_malloc_intptr_c(a, width_height) result(istat) & ++ bind(C, name="cudaMallocFromC") ++ ++ use, intrinsic :: iso_c_binding ++ implicit none ++ ++ integer(kind=C_intptr_T) :: a ++ integer(kind=c_intptr_t), intent(in), value :: width_height ++ integer(kind=C_INT) :: istat ++ ++ end function cuda_malloc_intptr_c ++ end interface ++ ++ interface ++ function cuda_malloc_cptr_c(a, width_height) result(istat) & ++ bind(C, name="cudaMallocFromC") ++ ++ use, intrinsic :: iso_c_binding ++ implicit none ++ ++ type(c_ptr) :: a ++ integer(kind=c_intptr_t), intent(in), value :: width_height ++ integer(kind=C_INT) :: istat ++ ++ end function cuda_malloc_cptr_c ++ end interface ++ ++ !interface cuda_malloc ++ ! module procedure cuda_malloc_intptr ++ ! module procedure cuda_malloc_cptr ++ !end interface ++ ++ contains ++ ++ function cuda_setdevice(n) result(success) ++ use, intrinsic :: iso_c_binding ++ ++ implicit none ++ ++ integer(kind=ik), intent(in) :: n ++ logical :: success ++#ifdef WITH_NVIDIA_GPU_VERSION ++ success = cuda_setdevice_c(int(n,kind=c_int)) /= 0 ++#else ++ success = .true. ++#endif ++ end function cuda_setdevice ++ ++ ++ function cuda_malloc_intptr(a, width_height) result(success) ++ ++ use, intrinsic :: iso_c_binding ++ implicit none ++ ++ integer(kind=c_intptr_t) :: a ++ integer(kind=c_intptr_t), intent(in) :: width_height ++ logical :: success ++#ifdef WITH_NVIDIA_GPU_VERSION ++ success = cuda_malloc_intptr_c(a, width_height) /= 0 ++#else ++ success = .true. ++#endif ++ end function ++ ++ ++ function cuda_malloc_cptr(a, width_height) result(success) ++ ++ use, intrinsic :: iso_c_binding ++ implicit none ++ ++ type(c_ptr) :: a ++ integer(kind=c_intptr_t), intent(in) :: width_height ++ logical :: success ++#ifdef WITH_NVIDIA_GPU_VERSION ++ success = cuda_malloc_cptr_c(a, width_height) /= 0 ++#else ++ success = .true. ++#endif ++ end function ++ ++ function cuda_free_intptr(a) result(success) ++ ++ use, intrinsic :: iso_c_binding ++ ++ implicit none ++ integer(kind=C_intptr_T) :: a ++ logical :: success ++#ifdef WITH_NVIDIA_GPU_VERSION ++ success = cuda_free_intptr_c(a) /= 0 ++#else ++ success = .true. ++#endif ++ end function cuda_free_intptr ++ ++ function cuda_free_cptr(a) result(success) ++ ++ use, intrinsic :: iso_c_binding ++ ++ implicit none ++ type(c_ptr) :: a ++ logical :: success ++#ifdef WITH_NVIDIA_GPU_VERSION ++ success = cuda_free_cptr_c(a) /= 0 ++#else ++ success = .true. ++#endif ++ end function cuda_free_cptr ++ ++ ! functions to memcopy CUDA memory ++ ++ function cuda_memcpyDeviceToDevice() result(flag) ++ use, intrinsic :: iso_c_binding ++ implicit none ++ integer(kind=ik) :: flag ++#ifdef WITH_NVIDIA_GPU_VERSION ++ flag = int(cuda_memcpyDeviceToDevice_c()) ++#else ++ flag = 0 ++#endif ++ end function ++ ++ function cuda_memcpyHostToDevice() result(flag) ++ use, intrinsic :: iso_c_binding ++ use precision_for_tests ++ implicit none ++ integer(kind=ik) :: flag ++#ifdef WITH_NVIDIA_GPU_VERSION ++ flag = int(cuda_memcpyHostToDevice_c()) ++#else ++ flag = 0 ++#endif ++ end function ++ ++ function cuda_memcpyDeviceToHost() result(flag) ++ use, intrinsic :: iso_c_binding ++ use precision_for_tests ++ implicit none ++ integer(kind=ik) :: flag ++#ifdef WITH_NVIDIA_GPU_VERSION ++ flag = int( cuda_memcpyDeviceToHost_c()) ++#else ++ flag = 0 ++#endif ++ end function ++ ++ function cuda_hostRegisterDefault() result(flag) ++ use, intrinsic :: iso_c_binding ++ use precision_for_tests ++ implicit none ++ integer(kind=ik) :: flag ++#ifdef WITH_NVIDIA_GPU_VERSION ++ flag = int(cuda_hostRegisterDefault_c()) ++#else ++ flag = 0 ++#endif ++ end function ++ ++ function cuda_hostRegisterPortable() result(flag) ++ use, intrinsic :: iso_c_binding ++ use precision_for_tests ++ implicit none ++ integer(kind=ik) :: flag ++#ifdef WITH_NVIDIA_GPU_VERSION ++ flag = int(cuda_hostRegisterPortable_c()) ++#else ++ flag = 0 ++#endif ++ end function ++ ++ function cuda_hostRegisterMapped() result(flag) ++ use, intrinsic :: iso_c_binding ++ use precision_for_tests ++ implicit none ++ integer(kind=ik) :: flag ++#ifdef WITH_NVIDIA_GPU_VERSION ++ flag = int(cuda_hostRegisterMapped_c()) ++#else ++ flag = 0 ++#endif ++ end function ++ ++ function cuda_memcpy_intptr(dst, src, size, dir) result(success) ++ ++ use, intrinsic :: iso_c_binding ++ ++ implicit none ++ integer(kind=C_intptr_t) :: dst ++ integer(kind=C_intptr_t) :: src ++ integer(kind=c_intptr_t), intent(in) :: size ++ integer(kind=C_INT), intent(in) :: dir ++ logical :: success ++ ++#ifdef WITH_NVIDIA_GPU_VERSION ++ success = cuda_memcpy_intptr_c(dst, src, size, dir) /= 0 ++#else ++ success = .true. ++#endif ++ end function ++ ++ function cuda_memcpy_cptr(dst, src, size, dir) result(success) ++ ++ use, intrinsic :: iso_c_binding ++ ++ implicit none ++ type(c_ptr) :: dst ++ type(c_ptr) :: src ++ integer(kind=c_intptr_t), intent(in) :: size ++ integer(kind=C_INT), intent(in) :: dir ++ logical :: success ++ ++#ifdef WITH_NVIDIA_GPU_VERSION ++ success = cuda_memcpy_cptr_c(dst, src, size, dir) /= 0 ++#else ++ !success = .true. ++ success = .false. ++#endif ++ end function ++ ++ function cuda_memcpy_mixed(dst, src, size, dir) result(success) ++ ++ use, intrinsic :: iso_c_binding ++ ++ implicit none ++ type(c_ptr) :: dst ++ integer(kind=C_intptr_t) :: src ++ integer(kind=c_intptr_t), intent(in) :: size ++ integer(kind=C_INT), intent(in) :: dir ++ logical :: success ++ ++#ifdef WITH_NVIDIA_GPU_VERSION ++ success = cuda_memcpy_mixed_c(dst, src, size, dir) /= 0 ++#else ++ success = .true. ++#endif ++ end function ++ ++end module test_cuda_functions +diff -ruN elpa-new_release_2021.11.001/examples/shared/GPU/CUDA/test_cudaFunctions.cu elpa-new_release_2021.11.001_ok/examples/shared/GPU/CUDA/test_cudaFunctions.cu +--- elpa-new_release_2021.11.001/examples/shared/GPU/CUDA/test_cudaFunctions.cu 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/shared/GPU/CUDA/test_cudaFunctions.cu 2022-01-26 10:10:58.320705000 +0100 +@@ -0,0 +1,152 @@ ++// ++// Copyright 2014, A. Marek ++// ++// This file is part of ELPA. ++// ++// The ELPA library was originally created by the ELPA consortium, ++// consisting of the following organizations: ++// ++// - Max Planck Computing and Data Facility (MPCDF), formerly known as ++// Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++// - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++// Informatik, ++// - Technische Universität München, Lehrstuhl für Informatik mit ++// Schwerpunkt Wissenschaftliches Rechnen , ++// - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++// - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++// Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++// and ++// - IBM Deutschland GmbH ++// ++// This particular source code file contains additions, changes and ++// enhancements authored by Intel Corporation which is not part of ++// the ELPA consortium. ++// ++// More information can be found here: ++// http://elpa.mpcdf.mpg.de/ ++// ++// ELPA is free software: you can redistribute it and/or modify ++// it under the terms of the version 3 of the license of the ++// GNU Lesser General Public License as published by the Free ++// Software Foundation. ++// ++// ELPA is distributed in the hope that it will be useful, ++// but WITHOUT ANY WARRANTY; without even the implied warranty of ++// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++// GNU Lesser General Public License for more details. ++// ++// You should have received a copy of the GNU Lesser General Public License ++// along with ELPA. If not, see <http://www.gnu.org/licenses/> ++// ++// ELPA reflects a substantial effort on the part of the original ++// ELPA consortium, and we ask you to respect the spirit of the ++// license that we chose: i.e., please contribute any changes you ++// may have back to the original ELPA library distribution, and keep ++// any derivatives of ELPA under the same license that we chose for ++// the original distribution, the GNU Lesser General Public License. ++// ++// ++// -------------------------------------------------------------------------------------------------- ++// ++// This file was written by A. Marek, MPCDF ++#include "config-f90.h" ++ ++#include <stdio.h> ++#include <math.h> ++#include <stdio.h> ++ ++#include <stdlib.h> ++#include <string.h> ++#include <time.h> ++#include <alloca.h> ++#include <stdint.h> ++#include <complex.h> ++#ifdef WITH_NVIDIA_GPU_VERSION ++#include <cublas_v2.h> ++#endif ++ ++ ++#define errormessage(x, ...) do { fprintf(stderr, "%s:%d " x, __FILE__, __LINE__, __VA_ARGS__ ); } while (0) ++ ++#ifdef DEBUG_CUDA ++#define debugmessage(x, ...) do { fprintf(stderr, "%s:%d " x, __FILE__, __LINE__, __VA_ARGS__ ); } while (0) ++#else ++#define debugmessage(x, ...) ++#endif ++ ++#ifdef WITH_NVIDIA_GPU_VERSION ++extern "C" { ++ ++ int cudaSetDeviceFromC(int n) { ++ ++ cudaError_t cuerr = cudaSetDevice(n); ++ if (cuerr != cudaSuccess) { ++ errormessage("Error in cudaSetDevice: %s\n",cudaGetErrorString(cuerr)); ++ return 0; ++ } ++ return 1; ++ } ++ ++ int cudaMallocFromC(intptr_t *a, size_t width_height) { ++ ++ cudaError_t cuerr = cudaMalloc((void **) a, width_height); ++#ifdef DEBUG_CUDA ++ printf("CUDA Malloc, pointer address: %p, size: %d \n", *a, width_height); ++#endif ++ if (cuerr != cudaSuccess) { ++ errormessage("Error in cudaMalloc: %s\n",cudaGetErrorString(cuerr)); ++ return 0; ++ } ++ return 1; ++ } ++ ++ int cudaFreeFromC(intptr_t *a) { ++#ifdef DEBUG_CUDA ++ printf("CUDA Free, pointer address: %p \n", a); ++#endif ++ cudaError_t cuerr = cudaFree(a); ++ ++ if (cuerr != cudaSuccess) { ++ errormessage("Error in cudaFree: %s\n",cudaGetErrorString(cuerr)); ++ return 0; ++ } ++ return 1; ++ } ++ ++ int cudaMemcpyFromC(intptr_t *dest, intptr_t *src, size_t count, int dir) { ++ ++ cudaError_t cuerr = cudaMemcpy( dest, src, count, (cudaMemcpyKind)dir); ++ if (cuerr != cudaSuccess) { ++ errormessage("Error in cudaMemcpy: %s\n",cudaGetErrorString(cuerr)); ++ return 0; ++ } ++ return 1; ++ } ++ ++ int cudaMemcpyDeviceToDeviceFromC(void) { ++ int val = cudaMemcpyDeviceToDevice; ++ return val; ++ } ++ int cudaMemcpyHostToDeviceFromC(void) { ++ int val = cudaMemcpyHostToDevice; ++ return val; ++ } ++ int cudaMemcpyDeviceToHostFromC(void) { ++ int val = cudaMemcpyDeviceToHost; ++ return val; ++ } ++ int cudaHostRegisterDefaultFromC(void) { ++ int val = cudaHostRegisterDefault; ++ return val; ++ } ++ int cudaHostRegisterPortableFromC(void) { ++ int val = cudaHostRegisterPortable; ++ return val; ++ } ++ int cudaHostRegisterMappedFromC(void) { ++ int val = cudaHostRegisterMapped; ++ return val; ++ } ++ ++} ++#endif /* TEST_NVIDIA_GPU == 1 */ +diff -ruN elpa-new_release_2021.11.001/examples/shared/GPU/ROCm/test_hip.F90 elpa-new_release_2021.11.001_ok/examples/shared/GPU/ROCm/test_hip.F90 +--- elpa-new_release_2021.11.001/examples/shared/GPU/ROCm/test_hip.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/shared/GPU/ROCm/test_hip.F90 2022-01-26 10:10:58.327011000 +0100 +@@ -0,0 +1,449 @@ ++! Copyright 2021, A. Marek ++! ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++! ++! This file was written by A. Marek, MPCDF ++ ++ ++#include "config-f90.h" ++module test_hip_functions ++ use, intrinsic :: iso_c_binding ++ use precision_for_tests ++ implicit none ++ ++ public ++ ++ integer(kind=ik) :: hipMemcpyHostToDevice ++ integer(kind=ik) :: hipMemcpyDeviceToHost ++ integer(kind=ik) :: hipMemcpyDeviceToDevice ++ integer(kind=ik) :: hipHostRegisterDefault ++ integer(kind=ik) :: hipHostRegisterPortable ++ integer(kind=ik) :: hipHostRegisterMapped ++ ++ ! TODO global variable, has to be changed ++ integer(kind=C_intptr_T) :: rocblasHandle = -1 ++ ++ ! functions to set and query the CUDA devices ++ interface ++ function hip_setdevice_c(n) result(istat) & ++ bind(C, name="hipSetDeviceFromC") ++ ++ use, intrinsic :: iso_c_binding ++ implicit none ++ integer(kind=C_INT), value :: n ++ integer(kind=C_INT) :: istat ++ end function hip_setdevice_c ++ end interface ++ ++ ! functions to copy CUDA memory ++ interface ++ function hip_memcpyDeviceToDevice_c() result(flag) & ++ bind(C, name="hipMemcpyDeviceToDeviceFromC") ++ use, intrinsic :: iso_c_binding ++ implicit none ++ integer(kind=c_int) :: flag ++ end function ++ end interface ++ ++ interface ++ function hip_memcpyHostToDevice_c() result(flag) & ++ bind(C, name="hipMemcpyHostToDeviceFromC") ++ use, intrinsic :: iso_c_binding ++ implicit none ++ integer(kind=c_int) :: flag ++ end function ++ end interface ++ ++ interface ++ function hip_memcpyDeviceToHost_c() result(flag) & ++ bind(C, name="hipMemcpyDeviceToHostFromC") ++ use, intrinsic :: iso_c_binding ++ implicit none ++ integer(kind=c_int) :: flag ++ end function ++ end interface ++ ++ interface ++ function hip_hostRegisterDefault_c() result(flag) & ++ bind(C, name="hipHostRegisterDefaultFromC") ++ use, intrinsic :: iso_c_binding ++ implicit none ++ integer(kind=c_int) :: flag ++ end function ++ end interface ++ ++ interface ++ function hip_hostRegisterPortable_c() result(flag) & ++ bind(C, name="hipHostRegisterPortableFromC") ++ use, intrinsic :: iso_c_binding ++ implicit none ++ integer(kind=c_int) :: flag ++ end function ++ end interface ++ ++ interface ++ function hip_hostRegisterMapped_c() result(flag) & ++ bind(C, name="hipHostRegisterMappedFromC") ++ use, intrinsic :: iso_c_binding ++ implicit none ++ integer(kind=c_int) :: flag ++ end function ++ end interface ++ ++ interface ++ function hip_memcpy_intptr_c(dst, src, size, dir) result(istat) & ++ bind(C, name="hipMemcpyFromC") ++ ++ use, intrinsic :: iso_c_binding ++ ++ implicit none ++ integer(kind=C_intptr_t), value :: dst ++ integer(kind=C_intptr_t), value :: src ++ integer(kind=c_intptr_t), intent(in), value :: size ++ integer(kind=C_INT), intent(in), value :: dir ++ integer(kind=C_INT) :: istat ++ ++ end function hip_memcpy_intptr_c ++ end interface ++ ++ interface ++ function hip_memcpy_cptr_c(dst, src, size, dir) result(istat) & ++ bind(C, name="hipMemcpyFromC") ++ ++ use, intrinsic :: iso_c_binding ++ ++ implicit none ++ type(c_ptr), value :: dst ++ type(c_ptr), value :: src ++ integer(kind=c_intptr_t), intent(in), value :: size ++ integer(kind=C_INT), intent(in), value :: dir ++ integer(kind=C_INT) :: istat ++ ++ end function hip_memcpy_cptr_c ++ end interface ++ ++ interface ++ function hip_memcpy_mixed_c(dst, src, size, dir) result(istat) & ++ bind(C, name="hipMemcpyFromC") ++ ++ use, intrinsic :: iso_c_binding ++ ++ implicit none ++ type(c_ptr), value :: dst ++ integer(kind=c_intptr_t), value :: src ++ integer(kind=c_intptr_t), intent(in), value :: size ++ integer(kind=C_INT), intent(in), value :: dir ++ integer(kind=C_INT) :: istat ++ ++ end function hip_memcpy_mixed_c ++ end interface ++ ++ ! functions to allocate and free CUDA memory ++ ++ interface ++ function hip_free_intptr_c(a) result(istat) & ++ bind(C, name="hipFreeFromC") ++ ++ use, intrinsic :: iso_c_binding ++ ++ implicit none ++ integer(kind=C_intptr_T), value :: a ++ integer(kind=C_INT) :: istat ++ ++ end function hip_free_intptr_c ++ end interface ++ ++ interface ++ function hip_free_cptr_c(a) result(istat) & ++ bind(C, name="hipFreeFromC") ++ ++ use, intrinsic :: iso_c_binding ++ ++ implicit none ++ type(c_ptr), value :: a ++ integer(kind=C_INT) :: istat ++ ++ end function hip_free_cptr_c ++ end interface ++ ++ interface hip_memcpy ++ module procedure hip_memcpy_intptr ++ module procedure hip_memcpy_cptr ++ module procedure hip_memcpy_mixed ++ end interface ++ ++ interface hip_free ++ module procedure hip_free_intptr ++ module procedure hip_free_cptr ++ end interface ++ ++ interface ++ function hip_malloc_intptr_c(a, width_height) result(istat) & ++ bind(C, name="hipMallocFromC") ++ ++ use, intrinsic :: iso_c_binding ++ implicit none ++ ++ integer(kind=C_intptr_T) :: a ++ integer(kind=c_intptr_t), intent(in), value :: width_height ++ integer(kind=C_INT) :: istat ++ ++ end function hip_malloc_intptr_c ++ end interface ++ ++ interface ++ function hip_malloc_cptr_c(a, width_height) result(istat) & ++ bind(C, name="hipMallocFromC") ++ ++ use, intrinsic :: iso_c_binding ++ implicit none ++ ++ type(c_ptr) :: a ++ integer(kind=c_intptr_t), intent(in), value :: width_height ++ integer(kind=C_INT) :: istat ++ ++ end function hip_malloc_cptr_c ++ end interface ++ ++ contains ++ ++ function hip_setdevice(n) result(success) ++ use, intrinsic :: iso_c_binding ++ ++ implicit none ++ ++ integer(kind=ik), intent(in) :: n ++ logical :: success ++#ifdef WITH_AMD_GPU_VERSION ++ success = hip_setdevice_c(int(n,kind=c_int)) /= 0 ++#else ++ success = .true. ++#endif ++ end function hip_setdevice ++ ++ ! functions to allocate and free memory ++ ++ function hip_malloc_intptr(a, width_height) result(success) ++ ++ use, intrinsic :: iso_c_binding ++ implicit none ++ ++ integer(kind=C_intptr_t) :: a ++ integer(kind=c_intptr_t), intent(in) :: width_height ++ logical :: success ++#ifdef WITH_AMD_GPU_VERSION ++ success = hip_malloc_intptr_c(a, width_height) /= 0 ++#else ++ success = .true. ++#endif ++ end function ++ ++ function hip_malloc_cptr(a, width_height) result(success) ++ ++ use, intrinsic :: iso_c_binding ++ implicit none ++ ++ type(c_ptr) :: a ++ integer(kind=c_intptr_t), intent(in) :: width_height ++ logical :: success ++#ifdef WITH_AMD_GPU_VERSION ++ success = hip_malloc_cptr_c(a, width_height) /= 0 ++#else ++ success = .true. ++#endif ++ end function ++ ++ function hip_free_intptr(a) result(success) ++ ++ use, intrinsic :: iso_c_binding ++ ++ implicit none ++ integer(kind=C_intptr_T) :: a ++ logical :: success ++#ifdef WITH_AMD_GPU_VERSION ++ success = hip_free_intptr_c(a) /= 0 ++#else ++ success = .true. ++#endif ++ end function hip_free_intptr ++ ++ function hip_free_cptr(a) result(success) ++ ++ use, intrinsic :: iso_c_binding ++ ++ implicit none ++ type(c_ptr) :: a ++ logical :: success ++#ifdef WITH_AMD_GPU_VERSION ++ success = hip_free_cptr_c(a) /= 0 ++#else ++ success = .true. ++#endif ++ end function hip_free_cptr ++ ++ ! functions to memcopy CUDA memory ++ ++ function hip_memcpyDeviceToDevice() result(flag) ++ use, intrinsic :: iso_c_binding ++ implicit none ++ integer(kind=ik) :: flag ++#ifdef WITH_AMD_GPU_VERSION ++ flag = int(hip_memcpyDeviceToDevice_c()) ++#else ++ flag = 0 ++#endif ++ end function ++ ++ function hip_memcpyHostToDevice() result(flag) ++ use, intrinsic :: iso_c_binding ++ use precision_for_tests ++ implicit none ++ integer(kind=ik) :: flag ++#ifdef WITH_AMD_GPU_VERSION ++ flag = int(hip_memcpyHostToDevice_c()) ++#else ++ flag = 0 ++#endif ++ end function ++ ++ function hip_memcpyDeviceToHost() result(flag) ++ use, intrinsic :: iso_c_binding ++ use precision_for_tests ++ implicit none ++ integer(kind=ik) :: flag ++#ifdef WITH_AMD_GPU_VERSION ++ flag = int( hip_memcpyDeviceToHost_c()) ++#else ++ flag = 0 ++#endif ++ end function ++ ++ function hip_hostRegisterDefault() result(flag) ++ use, intrinsic :: iso_c_binding ++ use precision_for_tests ++ implicit none ++ integer(kind=ik) :: flag ++#ifdef WITH_AMD_GPU_VERSION ++ flag = int(hip_hostRegisterDefault_c()) ++#else ++ flag = 0 ++#endif ++ end function ++ ++ function hip_hostRegisterPortable() result(flag) ++ use, intrinsic :: iso_c_binding ++ use precision_for_tests ++ implicit none ++ integer(kind=ik) :: flag ++#ifdef WITH_AMD_GPU_VERSION ++ flag = int(hip_hostRegisterPortable_c()) ++#else ++ flag = 0 ++#endif ++ end function ++ ++ function hip_hostRegisterMapped() result(flag) ++ use, intrinsic :: iso_c_binding ++ use precision_for_tests ++ implicit none ++ integer(kind=ik) :: flag ++#ifdef WITH_AMD_GPU_VERSION ++ flag = int(hip_hostRegisterMapped_c()) ++#else ++ flag = 0 ++#endif ++ end function ++ ++ function hip_memcpy_intptr(dst, src, size, dir) result(success) ++ ++ use, intrinsic :: iso_c_binding ++ ++ implicit none ++ integer(kind=C_intptr_t) :: dst ++ integer(kind=C_intptr_t) :: src ++ integer(kind=c_intptr_t), intent(in) :: size ++ integer(kind=C_INT), intent(in) :: dir ++ logical :: success ++ ++#ifdef WITH_AMD_GPU_VERSION ++ success = hip_memcpy_intptr_c(dst, src, size, dir) /= 0 ++#else ++ success = .true. ++#endif ++ end function ++ ++ function hip_memcpy_cptr(dst, src, size, dir) result(success) ++ ++ use, intrinsic :: iso_c_binding ++ ++ implicit none ++ type(c_ptr) :: dst ++ type(c_ptr) :: src ++ integer(kind=c_intptr_t), intent(in) :: size ++ integer(kind=C_INT), intent(in) :: dir ++ logical :: success ++ ++#ifdef WITH_AMD_GPU_VERSION ++ success = hip_memcpy_cptr_c(dst, src, size, dir) /= 0 ++#else ++ success = .true. ++#endif ++ end function ++ ++ function hip_memcpy_mixed(dst, src, size, dir) result(success) ++ ++ use, intrinsic :: iso_c_binding ++ ++ implicit none ++ type(c_ptr) :: dst ++ integer(kind=c_intptr_t) :: src ++ integer(kind=c_intptr_t), intent(in) :: size ++ integer(kind=C_INT), intent(in) :: dir ++ logical :: success ++ ++#ifdef WITH_AMD_GPU_VERSION ++ success = hip_memcpy_mixed_c(dst, src, size, dir) /= 0 ++#else ++ success = .true. ++#endif ++ end function ++ ++end module test_hip_functions +diff -ruN elpa-new_release_2021.11.001/examples/shared/GPU/ROCm/test_rocmFunctions.cpp elpa-new_release_2021.11.001_ok/examples/shared/GPU/ROCm/test_rocmFunctions.cpp +--- elpa-new_release_2021.11.001/examples/shared/GPU/ROCm/test_rocmFunctions.cpp 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/shared/GPU/ROCm/test_rocmFunctions.cpp 2022-01-26 10:10:58.328343000 +0100 +@@ -0,0 +1,153 @@ ++// ++// Copyright 2021, A. Marek ++// ++// This file is part of ELPA. ++// ++// The ELPA library was originally created by the ELPA consortium, ++// consisting of the following organizations: ++// ++// - Max Planck Computing and Data Facility (MPCDF), formerly known as ++// Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++// - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++// Informatik, ++// - Technische Universität München, Lehrstuhl für Informatik mit ++// Schwerpunkt Wissenschaftliches Rechnen , ++// - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++// - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++// Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++// and ++// - IBM Deutschland GmbH ++// ++// This particular source code file contains additions, changes and ++// enhancements authored by Intel Corporation which is not part of ++// the ELPA consortium. ++// ++// More information can be found here: ++// http://elpa.mpcdf.mpg.de/ ++// ++// ELPA is free software: you can redistribute it and/or modify ++// it under the terms of the version 3 of the license of the ++// GNU Lesser General Public License as published by the Free ++// Software Foundation. ++// ++// ELPA is distributed in the hope that it will be useful, ++// but WITHOUT ANY WARRANTY; without even the implied warranty of ++// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++// GNU Lesser General Public License for more details. ++// ++// You should have received a copy of the GNU Lesser General Public License ++// along with ELPA. If not, see <http://www.gnu.org/licenses/> ++// ++// ELPA reflects a substantial effort on the part of the original ++// ELPA consortium, and we ask you to respect the spirit of the ++// license that we chose: i.e., please contribute any changes you ++// may have back to the original ELPA library distribution, and keep ++// any derivatives of ELPA under the same license that we chose for ++// the original distribution, the GNU Lesser General Public License. ++// ++// ++// -------------------------------------------------------------------------------------------------- ++// ++// This file was written by A. Marek, MPCDF ++#include "config-f90.h" ++ ++#include <stdio.h> ++#include <math.h> ++#include <stdio.h> ++ ++#include <stdlib.h> ++#include <string.h> ++#include <time.h> ++#include <alloca.h> ++#include <stdint.h> ++#include <complex.h> ++#ifdef WITH_AMD_GPU_VERSION ++//missing header for rocblas ++#include "rocblas.h" ++#include "hip/hip_runtime_api.h" ++#endif ++ ++#define errormessage(x, ...) do { fprintf(stderr, "%s:%d " x, __FILE__, __LINE__, __VA_ARGS__ ); } while (0) ++ ++#ifdef DEBUG_HIP ++#define debugmessage(x, ...) do { fprintf(stderr, "%s:%d " x, __FILE__, __LINE__, __VA_ARGS__ ); } while (0) ++#else ++#define debugmessage(x, ...) ++#endif ++ ++#ifdef WITH_AMD_GPU_VERSION ++extern "C" { ++ ++ int hipSetDeviceFromC(int n) { ++ ++ hipError_t hiperr = hipSetDevice(n); ++ if (hiperr != hipSuccess) { ++ errormessage("Error in hipSetDevice: %s\n",hipGetErrorString(hiperr)); ++ return 0; ++ } ++ return 1; ++ } ++ ++ int hipMallocFromC(intptr_t *a, size_t width_height) { ++ ++ hipError_t hiperr = hipMalloc((void **) a, width_height); ++#ifdef DEBUG_HIP ++ printf("HIP Malloc, pointer address: %p, size: %d \n", *a, width_height); ++#endif ++ if (hiperr != hipSuccess) { ++ errormessage("Error in hipMalloc: %s\n",hipGetErrorString(hiperr)); ++ return 0; ++ } ++ return 1; ++ } ++ ++ int hipFreeFromC(intptr_t *a) { ++#ifdef DEBUG_HIP ++ printf("HIP Free, pointer address: %p \n", a); ++#endif ++ hipError_t hiperr = hipFree(a); ++ ++ if (hiperr != hipSuccess) { ++ errormessage("Error in hipFree: %s\n",hipGetErrorString(hiperr)); ++ return 0; ++ } ++ return 1; ++ } ++ ++ int hipMemcpyFromC(intptr_t *dest, intptr_t *src, size_t count, int dir) { ++ ++ hipError_t hiperr = hipMemcpy( dest, src, count, (hipMemcpyKind)dir); ++ if (hiperr != hipSuccess) { ++ errormessage("Error in hipMemcpy: %s\n",hipGetErrorString(hiperr)); ++ return 0; ++ } ++ return 1; ++ } ++ ++ int hipMemcpyDeviceToDeviceFromC(void) { ++ int val = (int)hipMemcpyDeviceToDevice; ++ return val; ++ } ++ int hipMemcpyHostToDeviceFromC(void) { ++ int val = (int)hipMemcpyHostToDevice; ++ return val; ++ } ++ int hipMemcpyDeviceToHostFromC(void) { ++ int val = (int)hipMemcpyDeviceToHost; ++ return val; ++ } ++ int hipHostRegisterDefaultFromC(void) { ++ int val = (int)hipHostRegisterDefault; ++ return val; ++ } ++ int hipHostRegisterPortableFromC(void) { ++ int val = (int)hipHostRegisterPortable; ++ return val; ++ } ++ int hipHostRegisterMappedFromC(void) { ++ int val = (int)hipHostRegisterMapped; ++ return val; ++ } ++ ++} ++#endif /* TEST_AMD_GPU == 1 */ +diff -ruN elpa-new_release_2021.11.001/examples/shared/GPU/test_gpu_vendor_agnostic_layer.F90 elpa-new_release_2021.11.001_ok/examples/shared/GPU/test_gpu_vendor_agnostic_layer.F90 +--- elpa-new_release_2021.11.001/examples/shared/GPU/test_gpu_vendor_agnostic_layer.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/shared/GPU/test_gpu_vendor_agnostic_layer.F90 2022-01-26 10:10:58.329439000 +0100 +@@ -0,0 +1,357 @@ ++#if 0 ++! Copyright 2021, A. Marek, MPCDF ++! ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Max Planck Computing and Data Facility (MPCDF), formerly known as ++! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! This particular source code file contains additions, changes and ++! enhancements authored by Intel Corporation which is not part of ++! the ELPA consortium. ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++#endif ++ ++ ++#include "config-f90.h" ++module test_gpu ++ !use precision_for_tests ++ use precision_for_tests ++ use iso_c_binding ++!#if TEST_INTEL_GPU == 1 ++! use mkl_offload ++!#endif ++ integer(kind=c_int), parameter :: nvidia_gpu = 1 ++ integer(kind=c_int), parameter :: amd_gpu = 2 ++ integer(kind=c_int), parameter :: intel_gpu = 3 ++ integer(kind=c_int), parameter :: no_gpu = -1 ++ integer(kind=c_int) :: use_gpu_vendor ++ integer(kind=c_int) :: gpuHostRegisterDefault ++ integer(kind=c_int) :: gpuMemcpyHostToDevice ++ integer(kind=c_int) :: gpuMemcpyDeviceToHost ++ integer(kind=c_int) :: gpuMemcpyDeviceToDevice ++ integer(kind=c_int) :: gpuHostRegisterMapped ++ integer(kind=c_int) :: gpuHostRegisterPortable ++ ++ integer(kind=c_intptr_t), parameter :: size_of_double_real = 8_rk8 ++#ifdef WANT_SINGLE_PRECISION_REAL ++ integer(kind=c_intptr_t), parameter :: size_of_single_real = 4_rk4 ++#endif ++ ++ integer(kind=c_intptr_t), parameter :: size_of_double_complex = 16_ck8 ++#ifdef WANT_SINGLE_PRECISION_COMPLEX ++ integer(kind=c_intptr_t), parameter :: size_of_single_complex = 8_ck4 ++#endif ++ ++ interface gpu_memcpy ++ module procedure gpu_memcpy_intptr ++ module procedure gpu_memcpy_cptr ++ module procedure gpu_memcpy_mixed ++ end interface ++ ++ interface gpu_malloc ++ module procedure gpu_malloc_intptr ++ module procedure gpu_malloc_cptr ++ end interface ++ ++ interface gpu_free ++ module procedure gpu_free_intptr ++ module procedure gpu_free_cptr ++ end interface ++ ++ contains ++ function gpu_vendor(set_vendor) result(vendor) ++ use precision_for_tests ++ implicit none ++ integer(kind=c_int) :: vendor ++ integer(kind=c_int), intent(in) :: set_vendor ++ ! default ++ vendor = no_gpu ++ if (set_vendor == nvidia_gpu) then ++ vendor = nvidia_gpu ++ endif ++ if (set_vendor == amd_gpu) then ++ vendor = amd_gpu ++ endif ++!#if TEST_INTEL_GPU == 1 ++! vendor = intel_gpu ++!#endif ++ use_gpu_vendor = vendor ++ return ++ end function ++ ++ subroutine set_gpu_parameters ++#ifdef WITH_NVIDIA_GPU_VERSION ++ use test_cuda_functions ++#endif ++#ifdef WITH_AMD_GPU_VERSION ++ use test_hip_functions ++#endif ++ implicit none ++ ++#ifdef WITH_NVIDIA_GPU_VERSION ++ if (use_gpu_vendor == nvidia_gpu) then ++ cudaMemcpyHostToDevice = cuda_memcpyHostToDevice() ++ gpuMemcpyHostToDevice = cudaMemcpyHostToDevice ++ cudaMemcpyDeviceToHost = cuda_memcpyDeviceToHost() ++ gpuMemcpyDeviceToHost = cudaMemcpyDeviceToHost ++ cudaMemcpyDeviceToDevice = cuda_memcpyDeviceToDevice() ++ gpuMemcpyDeviceToDevice = cudaMemcpyDeviceToDevice ++ cudaHostRegisterPortable = cuda_hostRegisterPortable() ++ gpuHostRegisterPortable = cudaHostRegisterPortable ++ cudaHostRegisterMapped = cuda_hostRegisterMapped() ++ gpuHostRegisterMapped = cudaHostRegisterMapped ++ cudaHostRegisterDefault = cuda_hostRegisterDefault() ++ gpuHostRegisterDefault = cudaHostRegisterDefault ++ endif ++#endif ++ ++#ifdef WITH_AMD_GPU_VERSION ++ if (use_gpu_vendor == amd_gpu) then ++ hipMemcpyHostToDevice = hip_memcpyHostToDevice() ++ gpuMemcpyHostToDevice = hipMemcpyHostToDevice ++ hipMemcpyDeviceToHost = hip_memcpyDeviceToHost() ++ gpuMemcpyDeviceToHost = hipMemcpyDeviceToHost ++ hipMemcpyDeviceToDevice = hip_memcpyDeviceToDevice() ++ gpuMemcpyDeviceToDevice = hipMemcpyDeviceToDevice ++ hipHostRegisterPortable = hip_hostRegisterPortable() ++ gpuHostRegisterPortable = hipHostRegisterPortable ++ hipHostRegisterMapped = hip_hostRegisterMapped() ++ gpuHostRegisterMapped = hipHostRegisterMapped ++ hipHostRegisterDefault = hip_hostRegisterDefault() ++ gpuHostRegisterDefault = hipHostRegisterDefault ++ endif ++#endif ++ ++ end subroutine ++ ++ function gpu_malloc_intptr(array, elements) result(success) ++ use, intrinsic :: iso_c_binding ++#ifdef WITH_NVIDIA_GPU_VERSION ++ use test_cuda_functions ++#endif ++#ifdef WITH_AMD_GPU_VERSION ++ use test_hip_functions ++#endif ++ implicit none ++ integer(kind=C_intptr_T) :: array ++ integer(kind=c_intptr_t), intent(in) :: elements ++ logical :: success ++ ++#ifdef WITH_NVIDIA_GPU_VERSION ++ if (use_gpu_vendor == nvidia_gpu) then ++ success = cuda_malloc_intptr(array, elements) ++ endif ++#endif ++#ifdef WITH_AMD_GPU_VERSION ++ if (use_gpu_vendor == amd_gpu) then ++ success = hip_malloc_intptr(array, elements) ++ endif ++#endif ++ ++ end function ++ ++ function gpu_malloc_cptr(array, elements) result(success) ++ use, intrinsic :: iso_c_binding ++#ifdef WITH_NVIDIA_GPU_VERSION ++ use test_cuda_functions ++#endif ++#ifdef WITH_AMD_GPU_VERSION ++ use test_hip_functions ++#endif ++ implicit none ++ type(c_ptr) :: array ++ integer(kind=c_intptr_t), intent(in) :: elements ++ logical :: success ++ success = .false. ++ ++#ifdef WITH_NVIDIA_GPU_VERSION ++ if (use_gpu_vendor == nvidia_gpu) then ++ success = cuda_malloc_cptr(array, elements) ++ endif ++#endif ++ ++#ifdef WITH_AMD_GPU_VERSION ++ if (use_gpu_vendor == amd_gpu) then ++ success = hip_malloc_cptr(array, elements) ++ endif ++#endif ++ ++ end function ++ ++ function gpu_memcpy_intptr(dst, src, size, dir) result(success) ++ use, intrinsic :: iso_c_binding ++#ifdef WITH_NVIDIA_GPU_VERSION ++ use test_cuda_functions ++#endif ++#ifdef WITH_AMD_GPU_VERSION ++ use test_hip_functions ++#endif ++ implicit none ++ integer(kind=C_intptr_t) :: dst ++ integer(kind=C_intptr_t) :: src ++ integer(kind=c_intptr_t), intent(in) :: size ++ integer(kind=C_INT), intent(in) :: dir ++ logical :: success ++ ++#ifdef WITH_NVIDIA_GPU_VERSION ++ if (use_gpu_vendor == nvidia_gpu) then ++ success = cuda_memcpy_intptr(dst, src, size, dir) ++ endif ++#endif ++ ++#ifdef WITH_AMD_GPU_VERSION ++ if (use_gpu_vendor == amd_gpu) then ++ success = hip_memcpy_intptr(dst, src, size, dir) ++ endif ++#endif ++ ++ end function ++ ++ function gpu_memcpy_cptr(dst, src, size, dir) result(success) ++ use, intrinsic :: iso_c_binding ++#ifdef WITH_NVIDIA_GPU_VERSION ++ use test_cuda_functions ++#endif ++#ifdef WITH_AMD_GPU_VERSION ++ use test_hip_functions ++#endif ++ implicit none ++ type(c_ptr) :: dst ++ type(c_ptr) :: src ++ integer(kind=c_intptr_t), intent(in) :: size ++ integer(kind=C_INT), intent(in) :: dir ++ logical :: success ++ ++#ifdef WITH_NVIDIA_GPU_VERSION ++ if (use_gpu_vendor == nvidia_gpu) then ++ success = cuda_memcpy_cptr(dst, src, size, dir) ++ endif ++#endif ++ ++#ifdef WITH_AMD_GPU_VERSION ++ if (use_gpu_vendor == amd_gpu) then ++ success = hip_memcpy_cptr(dst, src, size, dir) ++ endif ++#endif ++ ++ end function ++ ++ function gpu_memcpy_mixed(dst, src, size, dir) result(success) ++ use, intrinsic :: iso_c_binding ++#ifdef WITH_NVIDIA_GPU_VERSION ++ use test_cuda_functions ++#endif ++#ifdef WITH_AMD_GPU_VERSION ++ use test_hip_functions ++#endif ++ implicit none ++ type(c_ptr) :: dst ++ integer(kind=C_intptr_t) :: src ++ integer(kind=c_intptr_t), intent(in) :: size ++ integer(kind=C_INT), intent(in) :: dir ++ logical :: success ++ ++#ifdef WITH_NVIDIA_GPU_VERSION ++ if (use_gpu_vendor == nvidia_gpu) then ++ success = cuda_memcpy_mixed(dst, src, size, dir) ++ endif ++#endif ++ ++#ifdef WITH_AMD_GPU_VERSION ++ if (use_gpu_vendor == amd_gpu) then ++ success = hip_memcpy_mixed(dst, src, size, dir) ++ endif ++#endif ++ ++ end function ++ ++ function gpu_free_intptr(a) result(success) ++ use, intrinsic :: iso_c_binding ++#ifdef WITH_NVIDIA_GPU_VERSION ++ use test_cuda_functions ++#endif ++#ifdef WITH_AMD_GPU_VERSION ++ use test_hip_functions ++#endif ++ implicit none ++ integer(kind=c_intptr_t) :: a ++ ++ logical :: success ++ ++#ifdef WITH_NVIDIA_GPU_VERSION ++ if (use_gpu_vendor == nvidia_gpu) then ++ success = cuda_free_intptr(a) ++ endif ++#endif ++ ++#ifdef WITH_AMD_GPU_VERSION ++ if (use_gpu_vendor == amd_gpu) then ++ success = hip_free_intptr(a) ++ endif ++#endif ++ ++ end function ++ ++ function gpu_free_cptr(a) result(success) ++ use, intrinsic :: iso_c_binding ++#ifdef WITH_NVIDIA_GPU_VERSION ++ use test_cuda_functions ++#endif ++#ifdef WITH_AMD_GPU_VERSION ++ use test_hip_functions ++#endif ++ implicit none ++ type(c_ptr) :: a ++ ++ logical :: success ++ ++#ifdef WITH_NVIDIA_GPU_VERSION ++ if (use_gpu_vendor == nvidia_gpu) then ++ success = cuda_free_cptr(a) ++ endif ++#endif ++ ++#ifdef WITH_AMD_GPU_VERSION ++ if (use_gpu_vendor == amd_gpu) then ++ success = hip_free_cptr(a) ++ endif ++#endif ++ ++ end function ++ ++end module +diff -ruN elpa-new_release_2021.11.001/examples/shared/mod_tests_blas_interfaces.F90 elpa-new_release_2021.11.001_ok/examples/shared/mod_tests_blas_interfaces.F90 +--- elpa-new_release_2021.11.001/examples/shared/mod_tests_blas_interfaces.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/shared/mod_tests_blas_interfaces.F90 2022-01-26 10:10:58.330923000 +0100 +@@ -0,0 +1,53 @@ ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! https://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++! ++! This file was written by A. Marek, MPCDF ++ ++#include "config-f90.h" ++#define PRECISION_MODULE precision_for_tests ++module tests_blas_interfaces ++ use iso_c_binding ++ use precision_for_tests ++ ++ implicit none ++ ++#include "../../src/helpers/fortran_blas_interfaces.F90" ++ ++end module +diff -ruN elpa-new_release_2021.11.001/examples/shared/mod_tests_scalapack_interfaces.F90 elpa-new_release_2021.11.001_ok/examples/shared/mod_tests_scalapack_interfaces.F90 +--- elpa-new_release_2021.11.001/examples/shared/mod_tests_scalapack_interfaces.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/shared/mod_tests_scalapack_interfaces.F90 2022-01-26 10:10:58.331765000 +0100 +@@ -0,0 +1,56 @@ ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! https://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++! ++! This file was written by A. Marek, MPCDF ++ ++ ++#include "config-f90.h" ++#define PRECISION_MODULE precision_for_tests ++module tests_scalapack_interfaces ++ use iso_c_binding ++ use precision_for_tests ++ ++ implicit none ++ ++#include "../../src/helpers/fortran_scalapack_interfaces.F90" ++ ++end module ++ ++ +diff -ruN elpa-new_release_2021.11.001/examples/shared/test_analytic.F90 elpa-new_release_2021.11.001_ok/examples/shared/test_analytic.F90 +--- elpa-new_release_2021.11.001/examples/shared/test_analytic.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/shared/test_analytic.F90 2022-01-26 10:10:58.332553000 +0100 +@@ -0,0 +1,204 @@ ++! (c) Copyright Pavel Kus, 2017, MPCDF ++! ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Max Planck Computing and Data Facility (MPCDF), formerly known as ++! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++ ++#include "../Fortran/assert.h" ++#include "config-f90.h" ++ ++#ifdef HAVE_64BIT_INTEGER_MATH_SUPPORT ++#define TEST_INT_TYPE integer(kind=c_int64_t) ++#define INT_TYPE c_int64_t ++#else ++#define TEST_INT_TYPE integer(kind=c_int32_t) ++#define INT_TYPE c_int32_t ++#endif ++#ifdef HAVE_64BIT_INTEGER_MPI_SUPPORT ++#define TEST_INT_MPI_TYPE integer(kind=c_int64_t) ++#define INT_MPI_TYPE c_int64_t ++#else ++#define TEST_INT_MPI_TYPE integer(kind=c_int32_t) ++#define INT_MPI_TYPE c_int32_t ++#endif ++ ++module test_analytic ++ ++ use test_util ++#ifdef HAVE_DETAILED_TIMINGS ++ use ftimings ++#else ++ use timings_dummy ++#endif ++ use precision_for_tests ++ ++ interface prepare_matrix_analytic ++ module procedure prepare_matrix_analytic_complex_double ++ module procedure prepare_matrix_analytic_real_double ++#ifdef WANT_SINGLE_PRECISION_REAL ++ module procedure prepare_matrix_analytic_real_single ++#endif ++#ifdef WANT_SINGLE_PRECISION_COMPLEX ++ module procedure prepare_matrix_analytic_complex_single ++#endif ++ end interface ++ ++ interface check_correctness_analytic ++ module procedure check_correctness_analytic_complex_double ++ module procedure check_correctness_analytic_real_double ++#ifdef WANT_SINGLE_PRECISION_REAL ++ module procedure check_correctness_analytic_real_single ++#endif ++#ifdef WANT_SINGLE_PRECISION_COMPLEX ++ module procedure check_correctness_analytic_complex_single ++#endif ++ end interface ++ ++ ++ interface print_matrix ++ module procedure print_matrix_complex_double ++ module procedure print_matrix_real_double ++#ifdef WANT_SINGLE_PRECISION_REAL ++ module procedure print_matrix_real_single ++#endif ++#ifdef WANT_SINGLE_PRECISION_COMPLEX ++ module procedure print_matrix_complex_single ++#endif ++ end interface ++ ++ TEST_INT_TYPE, parameter, private :: num_primes = 3 ++#ifdef BUILD_FUGAKU ++ TEST_INT_TYPE, private :: primes(num_primes) ++#else ++ TEST_INT_TYPE, parameter, private :: primes(num_primes) = (/2,3,5/) ++#endif ++ ++ TEST_INT_TYPE, parameter, private :: ANALYTIC_MATRIX = 0 ++ TEST_INT_TYPE, parameter, private :: ANALYTIC_EIGENVECTORS = 1 ++ TEST_INT_TYPE, parameter, private :: ANALYTIC_EIGENVALUES = 2 ++ ++ contains ++ ++ function decompose(num, decomposition) result(possible) ++ implicit none ++ TEST_INT_TYPE, intent(in) :: num ++ TEST_INT_TYPE, intent(out) :: decomposition(num_primes) ++ logical :: possible ++ TEST_INT_TYPE :: reminder, prime, prime_id ++ ++#ifdef BUILD_FUGAKU ++ primes(1) = 2 ++ primes(2) = 3 ++ primes(3) = 5 ++#endif ++ decomposition = 0 ++ possible = .true. ++ reminder = num ++ do prime_id = 1, num_primes ++ prime = primes(prime_id) ++ do while (MOD(reminder, prime) == 0) ++ decomposition(prime_id) = decomposition(prime_id) + 1 ++ reminder = reminder / prime ++ end do ++ end do ++ if(reminder > 1) then ++ possible = .false. ++ end if ++ end function ++ ++ function compose(decomposition) result(num) ++ implicit none ++ TEST_INT_TYPE, intent(in) :: decomposition(num_primes) ++ TEST_INT_TYPE :: num, prime_id ++ ++ num = 1; ++#ifdef BUILD_FUGAKU ++ primes(1) = 2 ++ primes(2) = 3 ++ primes(3) = 5 ++#endif ++ do prime_id = 1, num_primes ++ num = num * primes(prime_id) ** decomposition(prime_id) ++ end do ++ end function ++ ++ ++#include "../../src/general/prow_pcol.F90" ++#include "../../src/general/map_global_to_local.F90" ++ ++ ++#define COMPLEXCASE 1 ++#define DOUBLE_PRECISION 1 ++#include "../../src/general/precision_macros.h" ++#include "test_analytic_template.F90" ++#undef DOUBLE_PRECISION ++#undef COMPLEXCASE ++ ++#ifdef WANT_SINGLE_PRECISION_COMPLEX ++ ++#define COMPLEXCASE 1 ++#define SINGLE_PRECISION 1 ++#include "../../src/general/precision_macros.h" ++#include "test_analytic_template.F90" ++#undef SINGLE_PRECISION ++#undef COMPLEXCASE ++ ++#endif /* WANT_SINGLE_PRECISION_COMPLEX */ ++ ++#define REALCASE 1 ++#define DOUBLE_PRECISION 1 ++#include "../../src/general/precision_macros.h" ++#include "test_analytic_template.F90" ++#undef DOUBLE_PRECISION ++#undef REALCASE ++ ++#ifdef WANT_SINGLE_PRECISION_REAL ++ ++#define REALCASE 1 ++#define SINGLE_PRECISION 1 ++#include "../../src/general/precision_macros.h" ++#include "test_analytic_template.F90" ++#undef SINGLE_PRECISION ++#undef REALCASE ++ ++#endif /* WANT_SINGLE_PRECISION_REAL */ ++ ++ ++end module +diff -ruN elpa-new_release_2021.11.001/examples/shared/test_analytic_template.F90 elpa-new_release_2021.11.001_ok/examples/shared/test_analytic_template.F90 +--- elpa-new_release_2021.11.001/examples/shared/test_analytic_template.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/shared/test_analytic_template.F90 2022-01-26 10:10:58.333552000 +0100 +@@ -0,0 +1,701 @@ ++! (c) Copyright Pavel Kus, 2017, MPCDF ++! ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Max Planck Computing and Data Facility (MPCDF), formerly known as ++! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++ ++#include "config-f90.h" ++ ++#ifdef HAVE_64BIT_INTEGER_MATH_SUPPORT ++#define TEST_INT_TYPE integer(kind=c_int64_t) ++#define INT_TYPE c_int64_t ++#else ++#define TEST_INT_TYPE integer(kind=c_int32_t) ++#define INT_TYPE c_int32_t ++#endif ++#ifdef HAVE_64BIT_INTEGER_MPI_SUPPORT ++#define TEST_INT_MPI_TYPE integer(kind=c_int64_t) ++#define INT_MPI_TYPE c_int64_t ++#else ++#define TEST_INT_MPI_TYPE integer(kind=c_int32_t) ++#define INT_MPI_TYPE c_int32_t ++#endif ++ ++ ++ subroutine prepare_matrix_analytic_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ &(na, a, nblk, myid, np_rows, np_cols, my_prow, my_pcol, print_times) ++ use precision_for_tests ++ ++ implicit none ++ TEST_INT_TYPE, intent(in) :: na, nblk, myid, np_rows, np_cols, my_prow, my_pcol ++ MATH_DATATYPE(kind=REAL_DATATYPE), intent(inout):: a(:,:) ++ logical, optional :: print_times ++ logical :: print_timer ++ TEST_INT_TYPE :: globI, globJ, locI, locJ, pi, pj, levels(num_primes) ++ integer(kind=c_int) :: loc_I, loc_J, p_i, p_j ++#ifdef HAVE_DETAILED_TIMINGS ++ type(timer_t) :: timer ++#else ++ type(timer_dummy_t) :: timer ++#endif ++ ++ call timer%enable() ++ call timer%start("prepare_matrix_analytic") ++ ++ print_timer = .true. ++ ++ if (present(print_times)) then ++ print_timer = print_times ++ endif ++ ++ ! for debug only, do it systematicaly somehow ... unit tests ++ call check_module_sanity_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ &(myid) ++ ++ if(.not. decompose(na, levels)) then ++ if(myid == 0) then ++ print *, "Analytic test can be run only with matrix sizes of the form 2^n * 3^m * 5^o" ++ stop 1 ++ end if ++ end if ++ ++ call timer%start("loop") ++ do globI = 1, na ++ ++ p_i = prow(int(globI,kind=c_int), int(nblk,kind=c_int), int(np_rows,kind=c_int)) ++ pi = int(p_i,kind=INT_TYPE) ++ if (my_prow .ne. pi) cycle ++ ++ do globJ = 1, na ++ ++ p_j = pcol(int(globJ,kind=c_int), int(nblk,kind=c_int), int(np_cols,kind=c_int)) ++ pj = int(p_j,kind=INT_TYPE) ++ if (my_pcol .ne. pj) cycle ++ ++ if(map_global_array_index_to_local_index(int(globI,kind=c_int), int(globJ,kind=c_int), loc_I, loc_J, & ++ int(nblk,kind=c_int), int(np_rows,kind=c_int), int(np_cols,kind=c_int), & ++ int(my_prow,kind=c_int), int(my_pcol,kind=c_int) )) then ++ locI = int(loc_i,kind=INT_TYPE) ++ locJ = int(loc_j,kind=INT_TYPE) ++ call timer%start("evaluation") ++ a(locI, locJ) = analytic_matrix_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ &(na, globI, globJ) ++ call timer%stop("evaluation") ++ else ++ print *, "Warning ... error in preparation loop of the analytic test" ++ end if ++ end do ++ end do ++ call timer%stop("loop") ++ ++ call timer%stop("prepare_matrix_analytic") ++ if(myid == 0 .and. print_timer) then ++ call timer%print("prepare_matrix_analytic") ++ end if ++ call timer%free() ++ end subroutine ++ ++ function check_correctness_analytic_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ &(na, nev, ev, z, nblk, myid, np_rows, np_cols, my_prow, my_pcol, check_all_evals, & ++ check_eigenvectors, print_times) result(status) ++ use precision_for_tests ++ ++ implicit none ++#include "./test_precision_kinds.F90" ++ TEST_INT_TYPE, intent(in) :: na, nev, nblk, myid, np_rows, & ++ np_cols, my_prow, my_pcol ++ TEST_INT_TYPE :: status ++ TEST_INT_MPI_TYPE :: mpierr ++ MATH_DATATYPE(kind=rck), intent(inout) :: z(:,:) ++ real(kind=rk), intent(inout) :: ev(:) ++ logical, intent(in) :: check_all_evals, check_eigenvectors ++ ++ TEST_INT_TYPE :: globI, globJ, locI, locJ, & ++ levels(num_primes) ++ integer(kind=c_int) :: loc_I, loc_J ++ real(kind=rk) :: diff, max_z_diff, max_ev_diff, & ++ glob_max_z_diff, max_curr_z_diff ++#ifdef DOUBLE_PRECISION ++ real(kind=rk), parameter :: tol_eigenvalues = 5e-14_rk8 ++ real(kind=rk), parameter :: tol_eigenvectors = 6e-11_rk8 ++#endif ++#ifdef SINGLE_PRECISION ++ ! tolerance needs to be very high due to qr tests ++ ! it should be distinguished somehow! ++ real(kind=rk), parameter :: tol_eigenvalues = 7e-6_rk4 ++ real(kind=rk), parameter :: tol_eigenvectors = 4e-3_rk4 ++#endif ++ real(kind=rk) :: computed_ev, expected_ev ++ MATH_DATATYPE(kind=rck) :: computed_z, expected_z ++ ++ MATH_DATATYPE(kind=rck) :: max_value_for_normalization, & ++ computed_z_on_max_position, & ++ normalization_quotient ++ MATH_DATATYPE(kind=rck) :: max_values_array(np_rows * np_cols), & ++ corresponding_exact_value ++ integer(kind=c_int) :: max_value_idx, rank_with_max, & ++ rank_with_max_reduced, & ++ num_checked_evals ++ integer(kind=c_int) :: max_idx_array(np_rows * np_cols), & ++ rank ++ logical, optional :: print_times ++ logical :: print_timer ++ ++#ifdef HAVE_DETAILED_TIMINGS ++ type(timer_t) :: timer ++#else ++ type(timer_dummy_t) :: timer ++#endif ++ ++ call timer%enable() ++ call timer%start("check_correctness_analytic") ++ ++ ++ print_timer = .true. ++ if (present(print_times)) then ++ print_timer = print_times ++ endif ++ ++ if(.not. decompose(na, levels)) then ++ print *, "can not decomopse matrix size" ++ stop 1 ++ end if ++ ++ if(check_all_evals) then ++ num_checked_evals = na ++ else ++ num_checked_evals = nev ++ endif ++ !call print_matrix(myid, na, z, "z") ++ max_z_diff = 0.0_rk ++ max_ev_diff = 0.0_rk ++ call timer%start("loop_eigenvalues") ++ do globJ = 1, num_checked_evals ++ computed_ev = ev(globJ) ++ call timer%start("evaluation") ++ expected_ev = analytic_eigenvalues_real_& ++ &PRECISION& ++ &(na, globJ) ++ call timer%stop("evaluation") ++ diff = abs(computed_ev - expected_ev) ++ max_ev_diff = max(diff, max_ev_diff) ++ end do ++ call timer%stop("loop_eigenvalues") ++ ++ call timer%start("loop_eigenvectors") ++ do globJ = 1, nev ++ max_curr_z_diff = 0.0_rk ++ ++ ! eigenvectors are unique up to multiplication by scalar (complex in complex case) ++ ! to be able to compare them with analytic, we have to normalize them somehow ++ ! we will find a value in computed eigenvector with highest absolut value and enforce ++ ! such multiple of computed eigenvector, that the value on corresponding position is the same ++ ! as an corresponding value in the analytical eigenvector ++ ++ ! find the maximal value in the local part of given eigenvector (with index globJ) ++ max_value_for_normalization = 0.0_rk ++ max_value_idx = -1 ++ do globI = 1, na ++ if(map_global_array_index_to_local_index(int(globI,kind=c_int), int(globJ,kind=c_int), loc_I, loc_J, & ++ int(nblk,kind=c_int), int(np_rows,kind=c_int), int(np_cols,kind=c_int), & ++ int(my_prow,kind=c_int), int(my_pcol,kind=c_int) )) then ++ locI = int(loc_I,kind=INT_TYPE) ++ locJ = int(loc_J,kind=INT_TYPE) ++ computed_z = z(locI, locJ) ++ if(abs(computed_z) > abs(max_value_for_normalization)) then ++ max_value_for_normalization = computed_z ++ max_value_idx = int(globI,kind=c_int) ++ end if ++ end if ++ end do ++ ++ ! find the global maximum and its position. From technical reasons (looking for a ++ ! maximum of complex number), it is not so easy to do it nicely. Therefore we ++ ! communicate local maxima to mpi rank 0 and resolve there. If we wanted to do ++ ! it without this, it would be tricky.. question of uniquness - two complex numbers ++ ! with the same absolut values, but completely different... ++#ifdef WITH_MPI ++ call MPI_Gather(max_value_for_normalization, 1_MPI_KIND, MPI_MATH_DATATYPE_PRECISION, & ++ max_values_array, 1_MPI_KIND, MPI_MATH_DATATYPE_PRECISION, 0_MPI_KIND, & ++ int(MPI_COMM_WORLD,kind=MPI_KIND), mpierr) ++ call MPI_Gather(max_value_idx, 1_MPI_KIND, MPI_INT, max_idx_array, 1_MPI_KIND, MPI_INT, & ++ 0_MPI_KIND, int(MPI_COMM_WORLD,kind=MPI_KIND), mpierr) ++ max_value_for_normalization = 0.0_rk ++ max_value_idx = -1 ++ do rank = 1, np_cols * np_rows ++ if(abs(max_values_array(rank)) > abs(max_value_for_normalization)) then ++ max_value_for_normalization = max_values_array(rank) ++ max_value_idx = max_idx_array(rank) ++ end if ++ end do ++ call MPI_Bcast(max_value_for_normalization, 1_MPI_KIND, MPI_MATH_DATATYPE_PRECISION, & ++ 0_MPI_KIND, int(MPI_COMM_WORLD,kind=MPI_KIND), mpierr) ++ call MPI_Bcast(max_value_idx, 1_MPI_KIND, MPI_INT, 0_MPI_KIND, & ++ int(MPI_COMM_WORLD,kind=MPI_KIND), mpierr) ++#endif ++ ! we decided what the maximum computed value is. Calculate expected value on the same ++ if(abs(max_value_for_normalization) < 0.0001_rk) then ++ if(myid == 0) print *, 'Maximal value in eigenvector too small :', max_value_for_normalization ++ status =1 ++ return ++ end if ++ call timer%start("evaluation_helper") ++ corresponding_exact_value = analytic_eigenvectors_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ &(na, int(max_value_idx,kind=INT_TYPE), globJ) ++ call timer%stop("evaluation_helper") ++ normalization_quotient = corresponding_exact_value / max_value_for_normalization ++ ! write(*,*) "normalization q", normalization_quotient ++ ++ ! compare computed and expected eigenvector values, but take into account normalization quotient ++ do globI = 1, na ++ if(map_global_array_index_to_local_index(int(globI,kind=c_int), int(globJ,kind=c_int), loc_I, loc_J, & ++ int(nblk,kind=c_int), int(np_rows,kind=c_int), int(np_cols,kind=c_int), & ++ int(my_prow,kind=c_int), int(my_pcol,kind=c_int) )) then ++ locI = int(loc_I,kind=INT_TYPE) ++ locJ = int(loc_J,kind=INT_TYPE) ++ computed_z = z(locI, locJ) ++ call timer%start("evaluation") ++ expected_z = analytic_eigenvectors_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ &(na, globI, globJ) ++ call timer%stop("evaluation") ++ max_curr_z_diff = max(abs(normalization_quotient * computed_z - expected_z), max_curr_z_diff) ++ end if ++ end do ++ ! we have max difference of one of the eigenvectors, update global ++ max_z_diff = max(max_z_diff, max_curr_z_diff) ++ end do !globJ ++ call timer%stop("loop_eigenvectors") ++ ++#ifdef WITH_MPI ++ call mpi_allreduce(max_z_diff, glob_max_z_diff, 1_MPI_KIND, MPI_REAL_PRECISION, MPI_MAX, & ++ int(MPI_COMM_WORLD,kind=MPI_KIND), mpierr) ++#else ++ glob_max_z_diff = max_z_diff ++#endif ++ if(myid == 0) print *, 'Maximum error in eigenvalues :', max_ev_diff ++ if (check_eigenvectors) then ++ if(myid == 0) print *, 'Maximum error in eigenvectors :', glob_max_z_diff ++ endif ++ ++ status = 0 ++ if (nev .gt. 2) then ++ if (max_ev_diff .gt. tol_eigenvalues .or. max_ev_diff .eq. 0.0_rk) status = 1 ++ if (check_eigenvectors) then ++ if (glob_max_z_diff .gt. tol_eigenvectors .or. glob_max_z_diff .eq. 0.0_rk) status = 1 ++ endif ++ else ++ if (max_ev_diff .gt. tol_eigenvalues) status = 1 ++ if (check_eigenvectors) then ++ if (glob_max_z_diff .gt. tol_eigenvectors) status = 1 ++ endif ++ endif ++ ++ call timer%stop("check_correctness_analytic") ++ if(myid == 0 .and. print_timer) then ++ call timer%print("check_correctness_analytic") ++ end if ++ call timer%free() ++ end function ++ ++ ++ function analytic_matrix_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ &(na, i, j) result(element) ++ use precision_for_tests ++ ++ implicit none ++ TEST_INT_TYPE, intent(in) :: na, i, j ++ MATH_DATATYPE(kind=REAL_DATATYPE) :: element ++ ++ element = analytic_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ &(na, i, j, ANALYTIC_MATRIX) ++ ++ end function ++ ++ function analytic_eigenvectors_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ &(na, i, j) result(element) ++ use precision_for_tests ++ ++ implicit none ++ TEST_INT_TYPE, intent(in) :: na, i, j ++ MATH_DATATYPE(kind=REAL_DATATYPE) :: element ++ ++ element = analytic_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ &(na, i, j, ANALYTIC_EIGENVECTORS) ++ ++ end function ++ ++ function analytic_eigenvalues_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ &(na, i) result(element) ++ use precision_for_tests ++ ++ implicit none ++ TEST_INT_TYPE, intent(in) :: na, i ++ real(kind=REAL_DATATYPE) :: element ++ ++ element = analytic_real_& ++ &PRECISION& ++ &(na, i, i, ANALYTIC_EIGENVALUES) ++ ++ end function ++ ++ function analytic_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ &(na, i, j, what) result(element) ++ use precision_for_tests ++ ++ implicit none ++#include "./test_precision_kinds.F90" ++ TEST_INT_TYPE, intent(in) :: na, i, j, what ++ MATH_DATATYPE(kind=rck) :: element, mat2x2(2,2), mat(5,5) ++ real(kind=rk) :: a, am, amp ++ TEST_INT_TYPE :: levels(num_primes) ++ TEST_INT_TYPE :: ii, jj, m, prime_id, prime, total_level, level ++ ++ real(kind=rk), parameter :: s = 0.5_rk ++ real(kind=rk), parameter :: c = 0.86602540378443864679_rk ++ real(kind=rk), parameter :: sq2 = 1.4142135623730950488_rk ++ ++ real(kind=rk), parameter :: largest_ev = 2.0_rk ++ ++ assert(i <= na) ++ assert(j <= na) ++ assert(i >= 0) ++ assert(j >= 0) ++ assert(decompose(na, levels)) ++ ! go to zero-based indexing ++ ii = i - 1 ++ jj = j - 1 ++ if (na .gt. 2) then ++ a = exp(log(largest_ev)/(na-1)) ++ else ++ a = exp(log(largest_ev)/(1)) ++ endif ++ ++ element = 1.0_rck ++#ifdef COMPLEXCASE ++ element = (1.0_rk, 0.0_rk) ++#endif ++ total_level = 0 ++ am = a ++#ifdef BUILD_FUGAKU ++ primes(1) = 2 ++ primes(2) = 3 ++ primes(3) = 5 ++#endif ++ do prime_id = 1,num_primes ++ prime = primes(prime_id) ++ do level = 1, levels(prime_id) ++ amp = am**(prime-1) ++ total_level = total_level + 1 ++ if(what == ANALYTIC_MATRIX) then ++#ifdef REALCASE ++#ifndef FUGAKU ++ mat2x2 = reshape((/ c*c + amp * s*s, (amp - 1.0_rk) * s*c, & ++ (amp - 1.0_rk) * s*c, s*s + amp * c*c /), & ++ (/2, 2/), order=(/2,1/)) ++#endif ++#endif ++#ifdef COMPLEXCASE ++#ifndef FUGAKU ++ mat2x2 = reshape((/ 0.5_rck * (amp + 1.0_rck) * (1.0_rk, 0.0_rk), sq2/4.0_rk * (amp - 1.0_rk) * (1.0_rk, 1.0_rk), & ++ sq2/4.0_rk * (amp - 1.0_rk) * (1.0_rk, -1.0_rk), 0.5_rck * (amp + 1.0_rck) * (1.0_rk, 0.0_rk) /), & ++ (/2, 2/), order=(/2,1/)) ++! intel 2018 does not reshape correctly (one would have to specify order=(/1,2/) ++! until this is resolved, I resorted to the following ++ mat2x2(1,2) = sq2/4.0_rk * (amp - 1.0_rk) * (1.0_rk, 1.0_rk) ++ mat2x2(2,1) = sq2/4.0_rk * (amp - 1.0_rk) * (1.0_rk, -1.0_rk) ++#endif ++#endif ++ else if(what == ANALYTIC_EIGENVECTORS) then ++#ifdef REALCASE ++#ifndef FUGAKU ++ mat2x2 = reshape((/ c, s, & ++ -s, c /), & ++ (/2, 2/), order=(/2,1/)) ++! intel 2018 does not reshape correctly (one would have to specify order=(/1,2/) ++! until this is resolved, I resorted to the following ++ mat2x2(1,2) = s ++ mat2x2(2,1) = -s ++#endif ++#endif ++#ifdef COMPLEXCASE ++#ifndef FUGAKU ++ mat2x2 = reshape((/ -sq2/2.0_rck * (1.0_rk, 0.0_rk), -sq2/2.0_rck * (1.0_rk, 0.0_rk), & ++ 0.5_rk * (1.0_rk, -1.0_rk), 0.5_rk * (-1.0_rk, 1.0_rk) /), & ++ (/2, 2/), order=(/2,1/)) ++! intel 2018 does not reshape correctly (one would have to specify order=(/1,2/) ++! until this is resolved, I resorted to the following ++ mat2x2(1,2) = -sq2/2.0_rck * (1.0_rk, 0.0_rk) ++ mat2x2(2,1) = 0.5_rk * (1.0_rk, -1.0_rk) ++#endif ++#endif ++ else if(what == ANALYTIC_EIGENVALUES) then ++#ifndef FUGAKU ++ mat2x2 = reshape((/ 1.0_rck, 0.0_rck, & ++ 0.0_rck, amp /), & ++ (/2, 2/), order=(/2,1/)) ++#endif ++ else ++ assert(.false.) ++ end if ++ ++ mat = 0.0_rck ++ if(prime == 2) then ++#ifndef BUILD_FUGAKU ++ mat(1:2, 1:2) = mat2x2 ++#endif ++ else if(prime == 3) then ++#ifndef BUILD_FUGAKU ++ mat((/1,3/),(/1,3/)) = mat2x2 ++#endif ++ if(what == ANALYTIC_EIGENVECTORS) then ++ mat(2,2) = 1.0_rck ++ else ++ mat(2,2) = am ++ end if ++ else if(prime == 5) then ++#ifndef BUILD_FUGAKU ++ mat((/1,5/),(/1,5/)) = mat2x2 ++#endif ++ if(what == ANALYTIC_EIGENVECTORS) then ++ mat(2,2) = 1.0_rck ++ mat(3,3) = 1.0_rck ++ mat(4,4) = 1.0_rck ++ else ++ mat(2,2) = am ++ mat(3,3) = am**2 ++ mat(4,4) = am**3 ++ end if ++ else ++ assert(.false.) ++ end if ++ ++ ! write(*,*) "calc value, elem: ", element, ", mat: ", mod(ii,2), mod(jj,2), mat(mod(ii,2), mod(jj,2)), "am ", am ++ ! write(*,*) " matrix mat", mat ++ element = element * mat(mod(ii,prime) + 1, mod(jj,prime) + 1) ++ ii = ii / prime ++ jj = jj / prime ++ ++ am = am**prime ++ end do ++ end do ++ !write(*,*) "returning value ", element ++ end function ++ ++ ++ subroutine print_matrix_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ &(myid, na, mat, mat_name) ++ use precision_for_tests ++ ++ implicit none ++#include "./test_precision_kinds.F90" ++ TEST_INT_TYPE, intent(in) :: myid, na ++ character(len=*), intent(in) :: mat_name ++ MATH_DATATYPE(kind=rck) :: mat(na, na) ++ TEST_INT_TYPE :: i,j ++ character(len=20) :: na_str ++ ++ if(myid .ne. 0) & ++ return ++ write(*,*) "Matrix: "//trim(mat_name) ++ write(na_str, *) na ++ do i = 1, na ++#ifdef REALCASE ++ write(*, '('//trim(na_str)//'f8.3)') mat(i, :) ++#endif ++#ifdef COMPLEXCASE ++ write(*,'('//trim(na_str)//'(A,f8.3,A,f8.3,A))') ('(', real(mat(i,j)), ',', aimag(mat(i,j)), ')', j=1,na) ++#endif ++ end do ++ write(*,*) ++ end subroutine ++ ++ ++ subroutine check_matrices_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ &(myid, na) ++ use precision_for_tests ++ ++ implicit none ++#include "./test_precision_kinds.F90" ++ TEST_INT_TYPE, intent(in) :: myid, na ++ MATH_DATATYPE(kind=rck) :: A(na, na), S(na, na), L(na, na), res(na, na) ++ TEST_INT_TYPE :: i, j, decomposition(num_primes) ++ ++ real(kind=rk) :: err ++#ifdef DOUBLE_PRECISION ++ real(kind=rk), parameter :: TOL = 1e-8 ++#endif ++#ifdef SINGLE_PRECISION ++ real(kind=rk), parameter :: TOL = 1e-4 ++#endif ++ ++ assert(decompose(na, decomposition)) ++ ++ do i = 1, na ++ do j = 1, na ++ A(i,j) = analytic_matrix_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ &(na, i, j) ++ S(i,j) = analytic_eigenvectors_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ &(na, i, j) ++ L(i,j) = analytic_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ &(na, i, j, ANALYTIC_EIGENVALUES) ++ end do ++ end do ++ ++ res = matmul(A,S) - matmul(S,L) ++ err = maxval(abs(res)) ++ ++ if(err > TOL) then ++ print *, "WARNING: sanity test in module analytic failed, error is ", err ++ end if ++ ++ if(.false.) then ++ !if(na == 2 .or. na == 5) then ++ call print_matrix(myid, na, A, "A") ++ call print_matrix(myid, na, S, "S") ++ call print_matrix(myid, na, L, "L") ++ ++ call print_matrix(myid, na, matmul(A,S), "AS") ++ call print_matrix(myid, na, matmul(S,L), "SL") ++ ++ call print_matrix(myid, na, res , "res") ++ end if ++ ++ end subroutine ++ ++ subroutine check_module_sanity_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ &(myid) ++ use precision_for_tests ++ ++ implicit none ++ TEST_INT_TYPE, intent(in) :: myid ++ TEST_INT_TYPE :: decomposition(num_primes), i ++#ifndef BUILD_FUGAKU ++ TEST_INT_TYPE, parameter :: check_sizes(7) = (/2, 3, 5, 6, 10, 25, 150/) ++#else ++ TEST_INT_TYPE :: check_sizes(7) ++#endif ++ if(myid == 0) print *, "Checking test_analytic module sanity.... " ++#ifndef BUILD_FUGAKU ++#ifdef HAVE_64BIT_INTEGER_MATH_SUPPORT ++ assert(decompose(1500_lik, decomposition)) ++#else ++ assert(decompose(1500_ik, decomposition)) ++#endif ++ assert(all(decomposition == (/2,1,3/))) ++#ifdef HAVE_64BIT_INTEGER_MATH_SUPPORT ++ assert(decompose(6_lik,decomposition)) ++#else ++ assert(decompose(6_ik,decomposition)) ++#endif ++ assert(all(decomposition == (/1,1,0/))) ++ ++#ifdef BUILD_FUGAKU ++ check_sizes(1) = 2 ++ check_sizes(2) = 3 ++ check_sizes(3) = 5 ++ check_sizes(4) = 10 ++ check_sizes(5) = 25 ++ check_sizes(6) = 150 ++#endif ++ do i =1, size(check_sizes) ++ call check_matrices_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ &(myid, check_sizes(i)) ++ end do ++ ++ if(myid == 0) print *, "Checking test_analytic module sanity.... DONE" ++#endif ++ end subroutine +diff -ruN elpa-new_release_2021.11.001/examples/shared/test_blacs_infrastructure.F90 elpa-new_release_2021.11.001_ok/examples/shared/test_blacs_infrastructure.F90 +--- elpa-new_release_2021.11.001/examples/shared/test_blacs_infrastructure.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/shared/test_blacs_infrastructure.F90 2022-01-26 10:10:58.368066000 +0100 +@@ -0,0 +1,208 @@ ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Max Planck Computing and Data Facility (MPCDF), formerly known as ++! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++! ++! ++#include "config-f90.h" ++ ++#ifdef HAVE_64BIT_INTEGER_MATH_SUPPORT ++#define TEST_INT_TYPE integer(kind=c_int64_t) ++#define INT_TYPE c_int64_t ++#define TEST_C_INT_TYPE_PTR long int* ++#define TEST_C_INT_TYPE long int ++#else ++#define TEST_INT_TYPE integer(kind=c_int32_t) ++#define INT_TYPE c_int32_t ++#define TEST_C_INT_TYPE_PTR int* ++#define TEST_C_INT_TYPE int ++#endif ++#ifdef HAVE_64BIT_INTEGER_MPI_SUPPORT ++#define TEST_INT_MPI_TYPE integer(kind=c_int64_t) ++#define INT_MPI_TYPE c_int64_t ++#define TEST_C_INT_MPI_TYPE_PTR long int* ++#define TEST_C_INT_MPI_TYPE long int ++#else ++#define TEST_INT_MPI_TYPE integer(kind=c_int32_t) ++#define INT_MPI_TYPE c_int32_t ++#define TEST_C_INT_MPI_TYPE_PTR int* ++#define TEST_C_INT_MPI_TYPE int ++#endif ++ ++module test_blacs_infrastructure ++ ++ contains ++ ++ !c> void set_up_blacsgrid_f(TEST_C_INT_TYPE mpi_comm_parent, TEST_C_INT_TYPE np_rows, ++ !c> TEST_C_INT_TYPE np_cols, char layout, ++ !c> TEST_C_INT_TYPE_PTR my_blacs_ctxt, TEST_C_INT_TYPE_PTR my_prow, ++ !c> TEST_C_INT_TYPE_PTR my_pcol); ++ subroutine set_up_blacsgrid(mpi_comm_parent, np_rows, np_cols, layout, & ++ my_blacs_ctxt, my_prow, my_pcol) bind(C, name="set_up_blacsgrid_f") ++ ++ use precision_for_tests ++ use test_util ++ use iso_c_binding ++ ++ implicit none ++ TEST_INT_TYPE, intent(in), value :: mpi_comm_parent, np_rows, np_cols ++#ifdef SXAURORA ++ character(len=1), intent(in) :: layout ++#else ++ character(kind=c_char), intent(in), value :: layout ++#endif ++ TEST_INT_TYPE, intent(out) :: my_blacs_ctxt, my_prow, my_pcol ++ ++#ifdef WITH_MPI ++ TEST_INT_TYPE :: np_rows_, np_cols_ ++#endif ++ ++ if (layout /= 'R' .and. layout /= 'C') then ++ print *, "layout must be 'R' or 'C'" ++ stop 1 ++ end if ++ ++ my_blacs_ctxt = mpi_comm_parent ++#ifdef WITH_MPI ++ call BLACS_Gridinit(my_blacs_ctxt, layout, np_rows, np_cols) ++ call BLACS_Gridinfo(my_blacs_ctxt, np_rows_, np_cols_, my_prow, my_pcol) ++ if (np_rows /= np_rows_) then ++ print *, "BLACS_Gridinfo returned different values for np_rows as set by BLACS_Gridinit" ++ stop 1 ++ endif ++ if (np_cols /= np_cols_) then ++ print *, "BLACS_Gridinfo returned different values for np_cols as set by BLACS_Gridinit" ++ stop 1 ++ endif ++#else ++ my_prow = 0 ++ my_pcol = 0 ++#endif ++ end subroutine ++ ++ subroutine set_up_blacs_descriptor(na, nblk, my_prow, my_pcol, & ++ np_rows, np_cols, na_rows, & ++ na_cols, sc_desc, my_blacs_ctxt, info) ++ ++ use elpa_utilities, only : error_unit ++ use test_util ++ use precision_for_tests ++ use tests_scalapack_interfaces ++ implicit none ++ ++ TEST_INT_TYPE, intent(in) :: na, nblk, my_prow, my_pcol, np_rows, & ++ np_cols, & ++ my_blacs_ctxt ++ TEST_INT_TYPE, intent(inout) :: info ++ TEST_INT_TYPE, intent(out) :: na_rows, na_cols, sc_desc(1:9) ++ ++#ifdef WITH_MPI ++ TEST_INT_MPI_TYPE :: mpierr ++ ++ sc_desc(:) = 0 ++ ! determine the neccessary size of the distributed matrices, ++ ! we use the scalapack tools routine NUMROC ++ ++ na_rows = numroc(na, nblk, my_prow, 0_BLAS_KIND, np_rows) ++ na_cols = numroc(na, nblk, my_pcol, 0_BLAS_KIND, np_cols) ++ ++ ! set up the scalapack descriptor for the checks below ++ ! For ELPA the following restrictions hold: ++ ! - block sizes in both directions must be identical (args 4 a. 5) ++ ! - first row and column of the distributed matrix must be on ++ ! row/col 0/0 (arg 6 and 7) ++ ++ call descinit(sc_desc, na, na, nblk, nblk, 0_BLAS_KIND, 0_BLAS_KIND, & ++ my_blacs_ctxt, na_rows, info) ++ ++ if (info .ne. 0) then ++ write(error_unit,*) 'Error in BLACS descinit! info=',info ++ write(error_unit,*) 'Most likely this happend since you want to use' ++ write(error_unit,*) 'more MPI tasks than are possible for your' ++ write(error_unit,*) 'problem size (matrix size and blocksize)!' ++ write(error_unit,*) 'The blacsgrid can not be set up properly' ++ write(error_unit,*) 'Try reducing the number of MPI tasks...' ++ call MPI_ABORT(int(mpi_comm_world,kind=MPI_KIND), 1_MPI_KIND, mpierr) ++ endif ++#else /* WITH_MPI */ ++ na_rows = na ++ na_cols = na ++#endif /* WITH_MPI */ ++ ++ end subroutine ++ ++ !c> void set_up_blacs_descriptor_f(TEST_C_INT_TYPE na, TEST_C_INT_TYPE nblk, ++ !c> TEST_C_INT_TYPE my_prow, TEST_C_INT_TYPE my_pcol, ++ !c> TEST_C_INT_TYPE np_rows, TEST_C_INT_TYPE np_cols, ++ !c> TEST_C_INT_TYPE_PTR na_rows, TEST_C_INT_TYPE_PTR na_cols, ++ !c> TEST_C_INT_TYPE sc_desc[9], ++ !c> TEST_C_INT_TYPE my_blacs_ctxt, ++ !c> TEST_C_INT_TYPE_PTR info); ++ subroutine set_up_blacs_descriptor_f(na, nblk, my_prow, my_pcol, & ++ np_rows, np_cols, na_rows, & ++ na_cols, sc_desc, & ++ my_blacs_ctxt, info) & ++ bind(C, name="set_up_blacs_descriptor_f") ++ ++ use iso_c_binding ++ implicit none ++ ++ ++ TEST_INT_TYPE, value :: na, nblk, my_prow, my_pcol, np_rows, & ++ np_cols, my_blacs_ctxt ++ TEST_INT_TYPE :: na_rows, na_cols, info, sc_desc(1:9) ++ ++ call set_up_blacs_descriptor(na, nblk, my_prow, my_pcol, & ++ np_rows, np_cols, na_rows, & ++ na_cols, sc_desc, my_blacs_ctxt, info) ++ ++ ++ end subroutine ++ ++ ++ function index_l2g(idx_loc, nblk, iproc, nprocs) result(indexl2g) ++ use precision_for_tests ++ implicit none ++ TEST_INT_TYPE :: indexl2g ++ TEST_INT_TYPE :: idx_loc, nblk, iproc, nprocs ++ indexl2g = nprocs * nblk * ((idx_loc-1) / nblk) + mod(idx_loc-1,nblk) + mod(nprocs+iproc, nprocs)*nblk + 1 ++ return ++ end function ++ ++end module +diff -ruN elpa-new_release_2021.11.001/examples/shared/test_check_correctness.F90 elpa-new_release_2021.11.001_ok/examples/shared/test_check_correctness.F90 +--- elpa-new_release_2021.11.001/examples/shared/test_check_correctness.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/shared/test_check_correctness.F90 2022-01-26 10:10:58.369246000 +0100 +@@ -0,0 +1,156 @@ ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Max Planck Computing and Data Facility (MPCDF), formerly known as ++! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++! ++! Author: A. Marek, MPCDF ++#include "config-f90.h" ++ ++module test_check_correctness ++ use test_util ++ ++ interface check_correctness_evp_numeric_residuals ++ module procedure check_correctness_evp_numeric_residuals_complex_double ++ module procedure check_correctness_evp_numeric_residuals_real_double ++#ifdef WANT_SINGLE_PRECISION_REAL ++ module procedure check_correctness_evp_numeric_residuals_real_single ++#endif ++#ifdef WANT_SINGLE_PRECISION_COMPLEX ++ module procedure check_correctness_evp_numeric_residuals_complex_single ++#endif ++ end interface ++ ++ interface check_correctness_evp_numeric_residuals_ss ++! module procedure check_correctness_evp_numeric_residuals_ss_complex_double ++ module procedure check_correctness_evp_numeric_residuals_ss_real_double ++#ifdef WANT_SINGLE_PRECISION_REAL ++ module procedure check_correctness_evp_numeric_residuals_ss_real_single ++#endif ++! #ifdef WANT_SINGLE_PRECISION_COMPLEX ++! module procedure check_correctness_evp_numeric_residuals_ss_complex_single ++! #endif ++ end interface ++ ++ interface check_correctness_eigenvalues_toeplitz ++ module procedure check_correctness_eigenvalues_toeplitz_complex_double ++ module procedure check_correctness_eigenvalues_toeplitz_real_double ++#ifdef WANT_SINGLE_PRECISION_REAL ++ module procedure check_correctness_eigenvalues_toeplitz_real_single ++#endif ++#ifdef WANT_SINGLE_PRECISION_COMPLEX ++ module procedure check_correctness_eigenvalues_toeplitz_complex_single ++#endif ++ end interface ++ ++ interface check_correctness_eigenvalues_frank ++ module procedure check_correctness_eigenvalues_frank_complex_double ++ module procedure check_correctness_eigenvalues_frank_real_double ++#ifdef WANT_SINGLE_PRECISION_REAL ++ module procedure check_correctness_eigenvalues_frank_real_single ++#endif ++#ifdef WANT_SINGLE_PRECISION_COMPLEX ++ module procedure check_correctness_eigenvalues_frank_complex_single ++#endif ++ end interface ++ ++ interface check_correctness_cholesky ++ module procedure check_correctness_cholesky_complex_double ++ module procedure check_correctness_cholesky_real_double ++#ifdef WANT_SINGLE_PRECISION_REAL ++ module procedure check_correctness_cholesky_real_single ++#endif ++#ifdef WANT_SINGLE_PRECISION_COMPLEX ++ module procedure check_correctness_cholesky_complex_single ++#endif ++ end interface ++ ++ interface check_correctness_hermitian_multiply ++ module procedure check_correctness_hermitian_multiply_complex_double ++ module procedure check_correctness_hermitian_multiply_real_double ++#ifdef WANT_SINGLE_PRECISION_REAL ++ module procedure check_correctness_hermitian_multiply_real_single ++#endif ++#ifdef WANT_SINGLE_PRECISION_COMPLEX ++ module procedure check_correctness_hermitian_multiply_complex_single ++#endif ++ end interface ++ ++ ++ contains ++ ++#define COMPLEXCASE 1 ++#define DOUBLE_PRECISION 1 ++#include "../../src/general/precision_macros.h" ++#include "test_check_correctness_template.F90" ++#undef DOUBLE_PRECISION ++#undef COMPLEXCASE ++ ++#ifdef WANT_SINGLE_PRECISION_COMPLEX ++ ++#define COMPLEXCASE 1 ++#define SINGLE_PRECISION 1 ++#include "../../src/general/precision_macros.h" ++#include "test_check_correctness_template.F90" ++#undef SINGLE_PRECISION ++#undef COMPLEXCASE ++#endif /* WANT_SINGLE_PRECISION_COMPLEX */ ++ ++#define REALCASE 1 ++#define DOUBLE_PRECISION 1 ++#include "../../src/general/precision_macros.h" ++#include "test_check_correctness_template.F90" ++#undef DOUBLE_PRECISION ++#undef REALCASE ++ ++#ifdef WANT_SINGLE_PRECISION_REAL ++ ++#define REALCASE 1 ++#define SINGLE_PRECISION 1 ++#include "../../src/general/precision_macros.h" ++#include "test_check_correctness_template.F90" ++#undef SINGLE_PRECISION ++#undef REALCASE ++ ++ ++#endif /* WANT_SINGLE_PRECISION_REAL */ ++ ++#include "../../src/general/prow_pcol.F90" ++#include "../../src/general/map_global_to_local.F90" ++ ++end module +diff -ruN elpa-new_release_2021.11.001/examples/shared/test_check_correctness_template.F90 elpa-new_release_2021.11.001_ok/examples/shared/test_check_correctness_template.F90 +--- elpa-new_release_2021.11.001/examples/shared/test_check_correctness_template.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/shared/test_check_correctness_template.F90 2022-01-26 10:10:58.370443000 +0100 +@@ -0,0 +1,1134 @@ ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Max Planck Computing and Data Facility (MPCDF), formerly known as ++! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++! ++! Author: A. Marek, MPCDF ++ ++ ++#include "config-f90.h" ++ ++#ifdef HAVE_64BIT_INTEGER_MATH_SUPPORT ++#define TEST_INT_TYPE integer(kind=c_int64_t) ++#define INT_TYPE lik ++#define TEST_C_INT_TYPE_PTR long int* ++#define TEST_C_INT_TYPE long int ++#else ++#define TEST_INT_TYPE integer(kind=c_int32_t) ++#define INT_TYPE ik ++#define TEST_C_INT_TYPE_PTR int* ++#define TEST_C_INT_TYPE int ++#endif ++ ++#ifdef HAVE_64BIT_INTEGER_MPI_SUPPORT ++#define TEST_INT_MPI_TYPE integer(kind=c_int64_t) ++#define INT_MPI_TYPE lik ++#define TEST_C_INT_MPI_TYPE_PTR long int* ++#define TEST_C_INT_MPI_TYPE long int ++#else ++#define TEST_INT_MPI_TYPE integer(kind=c_int32_t) ++#define INT_MPI_TYPE ik ++#define TEST_C_INT_MPI_TYPE_PTR int* ++#define TEST_C_INT_MPI_TYPE int ++#endif ++ ++#if REALCASE == 1 ++ function check_correctness_evp_numeric_residuals_ss_real_& ++ &PRECISION& ++ & (na, nev, as, z, ev, sc_desc, nblk, myid, np_rows, np_cols, my_prow, my_pcol) result(status) ++ use tests_blas_interfaces ++ use tests_scalapack_interfaces ++ use precision_for_tests ++ use iso_c_binding ++ implicit none ++#include "../../src/general/precision_kinds.F90" ++ integer(kind=BLAS_KIND) :: status, na_cols, na_rows ++ integer(kind=BLAS_KIND), intent(in) :: na, nev, nblk, myid, np_rows, np_cols, my_prow, my_pcol ++ real(kind=rk), intent(in) :: as(:,:) ++ real(kind=rk) :: tmpr ++ complex(kind=rck), intent(in) :: z(:,:) ++ real(kind=rk) :: ev(:) ++ complex(kind=rck), dimension(size(as,dim=1),size(as,dim=2)) :: tmp1, tmp2 ++ complex(kind=rck) :: xc ++ ++ complex(kind=rck), allocatable :: as_complex(:,:) ++ ++ integer(kind=BLAS_KIND) :: sc_desc(:) ++ ++ integer(kind=BLAS_KIND) :: i, j, rowLocal, colLocal ++ integer(kind=c_int) :: row_Local, col_Local ++ real(kind=rck) :: err, errmax ++ ++ integer :: mpierr ++ ++ ! tolerance for the residual test for different math type/precision setups ++ real(kind=rk), parameter :: tol_res_real_double = 5e-4_rk ++ real(kind=rk), parameter :: tol_res_real_single = 3e-2_rk ++ real(kind=rk), parameter :: tol_res_complex_double = 5e-12_rk ++ real(kind=rk), parameter :: tol_res_complex_single = 3e-2_rk ++ real(kind=rk) :: tol_res = tol_res_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION ++ ! precision of generalized problem is lower ++ real(kind=rk), parameter :: generalized_penalty = 10.0_rk ++ ++ ! tolerance for the orthogonality test for different math type/precision setups ++! real(kind=rk), parameter :: tol_orth_real_double = 5e-11_rk ++ real(kind=rk), parameter :: tol_orth_real_double = 5e-4_rk ++ real(kind=rk), parameter :: tol_orth_real_single = 9e-2_rk ++ real(kind=rk), parameter :: tol_orth_complex_double = 5e-11_rk ++ real(kind=rk), parameter :: tol_orth_complex_single = 9e-3_rk ++ real(kind=rk), parameter :: tol_orth = tol_orth_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION ++ ++ complex(kind=rck), parameter :: CZERO = (0.0_rck,0.0_rck), CONE = (1.0_rck,0.0_rck) ++ ++ ++ status = 0 ++ ! Setup complex matrices and eigenvalues ++ na_rows = size(as,dim=1) ++ na_cols = size(as,dim=2) ++ ++ allocate(as_complex(na_rows,na_cols)) ++ do j=1, na_cols ++ do i=1,na_rows ++#ifdef DOUBLE_PRECISION_REAL ++ as_complex(i,j) = dcmplx(as(i,j),0.0_rk) ++#else ++ as_complex(i,j) = cmplx(as(i,j),0.0_rk) ++#endif ++ enddo ++ enddo ++ ++ ! 1. Residual (maximum of || A*Zi - Zi*EVi ||) ++ ++ ! tmp1 = Zi*EVi ++ tmp1(:,:) = z(:,:) ++ do i=1,nev ++#ifdef DOUBLE_PRECISION_REAL ++ xc = dcmplx(0.0_rk,ev(i)) ++#else ++ xc = cmplx(0.0_rk,ev(i)) ++#endif ++#ifdef WITH_MPI ++#ifdef DOUBLE_PRECISION_REAL ++ call pzscal(int(na,kind=BLAS_KIND), xc, tmp1, 1_BLAS_KIND, int(i,kind=BLAS_KIND), sc_desc, 1_BLAS_KIND) ++#else ++ call pcscal(int(na,kind=BLAS_KIND), xc, tmp1, 1_BLAS_KIND, int(i,kind=BLAS_KIND), sc_desc, 1_BLAS_KIND) ++#endif ++#else /* WITH_MPI */ ++#ifdef DOUBLE_PRECISION_REAL ++ call zscal(int(na,kind=BLAS_KIND), xc, tmp1(:,i), 1_BLAS_KIND) ++#else ++ call cscal(int(na,kind=BLAS_KIND), xc, tmp1(:,i), 1_BLAS_KIND) ++#endif ++#endif /* WITH_MPI */ ++ enddo ++ ++ ! normal eigenvalue problem .. no need to multiply ++ tmp2(:,:) = tmp1(:,:) ++ ++ ! tmp1 = A * Z ++ ! as is original stored matrix, Z are the EVs ++#ifdef WITH_MPI ++#ifdef DOUBLE_PRECISION_REAL ++ call PZGEMM('N', 'N', int(na,kind=BLAS_KIND), int(nev,kind=BLAS_KIND), int(na,kind=BLAS_KIND), & ++ CONE, as_complex, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc, & ++ z, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc, CZERO, tmp1, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc) ++#else ++ call PCGEMM('N', 'N', int(na,kind=BLAS_KIND), int(nev,kind=BLAS_KIND), int(na,kind=BLAS_KIND), & ++ CONE, as_complex, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc, & ++ z, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc, CZERO, tmp1, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc) ++#endif ++#else /* WITH_MPI */ ++#ifdef DOUBLE_PRECISION_REAL ++ call ZGEMM('N','N',int(na,kind=BLAS_KIND), int(nev,kind=BLAS_KIND), int(na,kind=BLAS_KIND), CONE, & ++ as_complex, int(na,kind=BLAS_KIND), z,int(na,kind=BLAS_KIND), CZERO, tmp1, int(na,kind=BLAS_KIND) ) ++#else ++ call CGEMM('N','N', int(na,kind=BLAS_KIND), int(nev,kind=BLAS_KIND), int(na,kind=BLAS_KIND), CONE, & ++ as_complex, int(na,kind=BLAS_KIND), z, int(na,kind=BLAS_KIND), CZERO, tmp1, int(na,kind=BLAS_KIND) ) ++#endif ++#endif /* WITH_MPI */ ++ ++ ! tmp1 = A*Zi - Zi*EVi ++ tmp1(:,:) = tmp1(:,:) - tmp2(:,:) ++ ++ ! Get maximum norm of columns of tmp1 ++ errmax = 0.0_rk ++ ++ do i=1,nev ++ xc = (0.0_rk,0.0_rk) ++#ifdef WITH_MPI ++#ifdef DOUBLE_PRECISION_REAL ++ call PZDOTC(int(na,kind=BLAS_KIND), xc, tmp1, 1_BLAS_KIND, int(i,kind=BLAS_KIND), sc_desc, & ++ 1_BLAS_KIND, tmp1, 1_BLAS_KIND, int(i,kind=BLAS_KIND), sc_desc, 1_BLAS_KIND) ++#else ++ call PCDOTC(int(na,kind=BLAS_KIND), xc, tmp1, 1_BLAS_KIND, int(i,kind=BLAS_KIND), sc_desc, & ++ 1_BLAS_KIND, tmp1, 1_BLAS_KIND, int(i,kind=BLAS_KIND), sc_desc, 1_BLAS_KIND) ++#endif ++#else /* WITH_MPI */ ++#ifdef DOUBLE_PRECISION_REAL ++ xc = ZDOTC(int(na,kind=BLAS_KIND) ,tmp1, 1_BLAS_KIND, tmp1, 1_BLAS_KIND) ++#else ++ xc = CDOTC(int(na,kind=BLAS_KIND) ,tmp1, 1_BLAS_KIND, tmp1, 1_BLAS_KIND) ++#endif ++#endif /* WITH_MPI */ ++ errmax = max(errmax, sqrt(real(xc,kind=REAL_DATATYPE))) ++ enddo ++ ++ ! Get maximum error norm over all processors ++ err = errmax ++#ifdef WITH_MPI ++ call mpi_allreduce(err, errmax, 1_MPI_KIND, MPI_REAL_PRECISION, MPI_MAX, int(MPI_COMM_WORLD,kind=MPI_KIND), mpierr) ++#else /* WITH_MPI */ ++ errmax = err ++#endif /* WITH_MPI */ ++ if (myid==0) print *,'%Results of numerical residual checks, using complex arithmetic:' ++ if (myid==0) print *,'%Error Residual :',errmax ++ if (nev .ge. 2) then ++ if (errmax .gt. tol_res .or. errmax .eq. 0.0_rk) then ++ status = 1 ++ endif ++ else ++ if (errmax .gt. tol_res) then ++ status = 1 ++ endif ++ endif ++ ++ ! 2. Eigenvector orthogonality ++ tmp2(:,:) = z(:,:) ++ tmp1 = 0 ++#ifdef WITH_MPI ++#ifdef DOUBLE_PRECISION_REAL ++ call PZGEMM('C', 'N', int(nev,kind=BLAS_KIND), int(nev,kind=BLAS_KIND), int(na,kind=BLAS_KIND), & ++ CONE, z, 1_BLAS_KIND, 1_BLAS_KIND, & ++ sc_desc, tmp2, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc, CZERO, tmp1, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc) ++#else ++ call PCGEMM('C', 'N', int(nev,kind=BLAS_KIND), int(nev,kind=BLAS_KIND), int(na,kind=BLAS_KIND), & ++ CONE, z, 1_BLAS_KIND, 1_BLAS_KIND, & ++ sc_desc, tmp2, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc, CZERO, tmp1, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc) ++#endif ++ ++#else /* WITH_MPI */ ++#ifdef DOUBLE_PRECISION_REAL ++ call ZGEMM('C','N', int(nev,kind=BLAS_KIND) , int(nev,kind=BLAS_KIND), int(na,kind=BLAS_KIND),CONE, z, & ++ int(na,kind=BLAS_KIND), tmp2, int(na,kind=BLAS_KIND), CZERO, tmp1, int(na,kind=BLAS_KIND)) ++#else ++ call CGEMM('C','N', int(nev,kind=BLAS_KIND) , int(nev,kind=BLAS_KIND), int(na,kind=BLAS_KIND),CONE, z, & ++ int(na,kind=BLAS_KIND), tmp2, int(na,kind=BLAS_KIND), CZERO, tmp1, int(na,kind=BLAS_KIND)) ++#endif ++#endif /* WITH_MPI */ ++ ! First check, whether the elements on diagonal are 1 .. "normality" of the vectors ++ err = 0.0_rk ++ do i=1, nev ++ if (map_global_array_index_to_local_index(int(i,kind=c_int), int(i,kind=c_int), row_Local, col_Local, & ++ int(nblk,kind=c_int), int(np_rows,kind=c_int), int(np_cols,kind=c_int), & ++ int(my_prow,kind=c_int), int(my_pcol,kind=c_int)) ) then ++ rowLocal = int(row_Local,kind=INT_TYPE) ++ colLocal = int(col_Local,kind=INT_TYPE) ++ err = max(err, abs(tmp1(rowLocal,colLocal) - CONE)) ++ endif ++ end do ++#ifdef WITH_MPI ++ call mpi_allreduce(err, errmax, 1_MPI_KIND, MPI_REAL_PRECISION, MPI_MAX, int(MPI_COMM_WORLD,kind=MPI_KIND), mpierr) ++#else /* WITH_MPI */ ++ errmax = err ++#endif /* WITH_MPI */ ++ if (myid==0) print *,'%Maximal error in eigenvector lengths:',errmax ++ ++ ! Second, find the maximal error in the whole Z**T * Z matrix (its diference from identity matrix) ++ ! Initialize tmp2 to unit matrix ++ tmp2 = 0 ++#ifdef WITH_MPI ++#ifdef DOUBLE_PRECISION_REAL ++ call PZLASET('A', int(nev,kind=BLAS_KIND), int(nev,kind=BLAS_KIND), CZERO, CONE, tmp2, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc) ++#else ++ call PCLASET('A', int(nev,kind=BLAS_KIND), int(nev,kind=BLAS_KIND), CZERO, CONE, tmp2, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc) ++#endif ++#else /* WITH_MPI */ ++#ifdef DOUBLE_PRECISION_REAL ++ call ZLASET('A',int(nev,kind=BLAS_KIND) ,int(nev,kind=BLAS_KIND) ,CZERO, CONE, tmp2, int(na,kind=BLAS_KIND)) ++#else ++ call CLASET('A',int(nev,kind=BLAS_KIND) ,int(nev,kind=BLAS_KIND) ,CZERO, CONE, tmp2, int(na,kind=BLAS_KIND)) ++#endif ++#endif /* WITH_MPI */ ++ ++ ! ! tmp1 = Z**T * Z - Unit Matrix ++ tmp1(:,:) = tmp1(:,:) - tmp2(:,:) ++ ++ ! Get maximum error (max abs value in tmp1) ++ err = maxval(abs(tmp1)) ++#ifdef WITH_MPI ++ call mpi_allreduce(err, errmax, 1_MPI_KIND, MPI_REAL_PRECISION, MPI_MAX, int(MPI_COMM_WORLD,kind=MPI_KIND), mpierr) ++#else /* WITH_MPI */ ++ errmax = err ++#endif /* WITH_MPI */ ++ if (myid==0) print *,'%Error Orthogonality:',errmax ++ ++ if (nev .ge. 2) then ++ if (errmax .gt. tol_orth .or. errmax .eq. 0.0_rk) then ++ status = 1 ++ endif ++ else ++ if (errmax .gt. tol_orth) then ++ status = 1 ++ endif ++ endif ++ ++ deallocate(as_complex) ++ end function ++ ++#endif /* REALCASE */ ++ ++#if REALCASE == 1 ++#ifdef DOUBLE_PRECISION_REAL ++ !c> TEST_C_INT_TYPE check_correctness_evp_numeric_residuals_ss_real_double_f(TEST_C_INT_TYPE na, TEST_C_INT_TYPE nev, TEST_C_INT_TYPE na_rows, TEST_C_INT_TYPE na_cols, ++ !c> double *as, complex double *z, double *ev, TEST_C_INT_TYPE sc_desc[9], ++ !c> TEST_C_INT_TYPE nblk, TEST_C_INT_TYPE myid, TEST_C_INT_TYPE np_rows, TEST_C_INT_TYPE np_cols, TEST_C_INT_TYPE my_prow, TEST_C_INT_TYPE my_pcol); ++#else ++ !c> TEST_C_INT_TYPE check_correctness_evp_numeric_residuals_ss_real_single_f(TEST_C_INT_TYPE na, TEST_C_INT_TYPE nev, TEST_C_INT_TYPE na_rows, TEST_C_INT_TYPE na_cols, ++ !c> float *as, complex float *z, float *ev, TEST_C_INT_TYPE sc_desc[9], ++ !c> TEST_C_INT_TYPE nblk, TEST_C_INT_TYPE myid, TEST_C_INT_TYPE np_rows, TEST_C_INT_TYPE np_cols, TEST_C_INT_TYPE my_prow, TEST_C_INT_TYPE my_pcol); ++#endif ++#endif /* REALCASE */ ++ ++#if REALCASE == 1 ++function check_correctness_evp_numeric_residuals_ss_real_& ++&PRECISION& ++&_f (na, nev, na_rows, na_cols, as, z, ev, sc_desc, nblk, myid, np_rows, np_cols, my_prow, my_pcol) result(status) & ++ bind(C,name="check_correctness_evp_numeric_residuals_ss_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ &_f") ++ ++ use precision_for_tests ++ use iso_c_binding ++ ++ implicit none ++#include "./test_precision_kinds.F90" ++ ++ TEST_INT_TYPE :: status ++ TEST_INT_TYPE, value :: na, nev, myid, na_rows, na_cols, nblk, np_rows, np_cols, my_prow, my_pcol ++ real(kind=rck) :: as(1:na_rows,1:na_cols) ++ complex(kind=rck) :: z(1:na_rows,1:na_cols) ++ real(kind=rck) :: ev(1:na) ++ TEST_INT_TYPE :: sc_desc(1:9) ++ ++ status = check_correctness_evp_numeric_residuals_ss_real_& ++ &PRECISION& ++ & (na, nev, as, z, ev, sc_desc, nblk, myid, np_rows, np_cols, my_prow, my_pcol) ++ end function ++#endif /* REALCASE */ ++ ++function check_correctness_evp_numeric_residuals_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ & (na, nev, as, z, ev, sc_desc, nblk, myid, np_rows, np_cols, my_prow, my_pcol, bs) result(status) ++ ++ use tests_blas_interfaces ++ use tests_scalapack_interfaces ++ use precision_for_tests ++ implicit none ++#include "./test_precision_kinds.F90" ++ TEST_INT_TYPE :: status ++ TEST_INT_TYPE, intent(in) :: na, nev, nblk, myid, np_rows, np_cols, my_prow, my_pcol ++ MATH_DATATYPE(kind=rck), intent(in) :: as(:,:), z(:,:) ++ MATH_DATATYPE(kind=rck), intent(in), optional :: bs(:,:) ++ real(kind=rk) :: ev(:) ++ MATH_DATATYPE(kind=rck), dimension(size(as,dim=1),size(as,dim=2)) :: tmp1, tmp2 ++ MATH_DATATYPE(kind=rck) :: xc ++ ++ TEST_INT_TYPE :: sc_desc(:) ++ ++ TEST_INT_TYPE :: i, rowLocal, colLocal ++ integer(kind=c_int) :: row_Local, col_Local ++ real(kind=rck) :: err, errmax ++ ++ TEST_INT_MPI_TYPE :: mpierr ++ ++! tolerance for the residual test for different math type/precision setups ++ real(kind=rk), parameter :: tol_res_real_double = 5e-12_rk ++ real(kind=rk), parameter :: tol_res_real_single = 3e-2_rk ++ real(kind=rk), parameter :: tol_res_complex_double = 5e-12_rk ++ real(kind=rk), parameter :: tol_res_complex_single = 3e-2_rk ++ real(kind=rk) :: tol_res = tol_res_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION ++ ! precision of generalized problem is lower ++ real(kind=rk), parameter :: generalized_penalty = 10.0_rk ++ ++ ! tolerance for the orthogonality test for different math type/precision setups ++ real(kind=rk), parameter :: tol_orth_real_double = 5e-11_rk ++ real(kind=rk), parameter :: tol_orth_real_single = 9e-2_rk ++ real(kind=rk), parameter :: tol_orth_complex_double = 5e-11_rk ++ real(kind=rk), parameter :: tol_orth_complex_single = 9e-3_rk ++ real(kind=rk), parameter :: tol_orth = tol_orth_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION ++ ++ if (present(bs)) then ++ tol_res = generalized_penalty * tol_res ++ endif ++ status = 0 ++ ++ ! 1. Residual (maximum of || A*Zi - Zi*EVi ||) ++ ++! tmp1 = Zi*EVi ++ tmp1(:,:) = z(:,:) ++ do i=1,nev ++ xc = ev(i) ++#ifdef WITH_MPI ++ call p& ++ &BLAS_CHAR& ++ &scal(na, xc, tmp1, 1_BLAS_KIND, i, sc_desc, 1_BLAS_KIND) ++#else /* WITH_MPI */ ++ call BLAS_CHAR& ++ &scal(na, xc, tmp1(:,i), 1_BLAS_KIND) ++#endif /* WITH_MPI */ ++ enddo ++ ++ ! for generalized EV problem, multiply by bs as well ++ ! tmp2 = B * tmp1 ++ if(present(bs)) then ++#ifdef WITH_MPI ++ call scal_PRECISION_GEMM('N', 'N', na, nev, na, ONE, bs, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc, & ++ tmp1, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc, ZERO, tmp2, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc) ++#else /* WITH_MPI */ ++ call PRECISION_GEMM('N','N',na,nev,na,ONE,bs,na,tmp1,na,ZERO,tmp2,na) ++#endif /* WITH_MPI */ ++ else ++ ! normal eigenvalue problem .. no need to multiply ++ tmp2(:,:) = tmp1(:,:) ++ end if ++ ++ ! tmp1 = A * Z ++ ! as is original stored matrix, Z are the EVs ++#ifdef WITH_MPI ++ call scal_PRECISION_GEMM('N', 'N', na, nev, na, ONE, as, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc, & ++ z, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc, ZERO, tmp1, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc) ++#else /* WITH_MPI */ ++ call PRECISION_GEMM('N','N',na,nev,na,ONE,as,na,z,na,ZERO,tmp1,na) ++#endif /* WITH_MPI */ ++ ++ ! tmp1 = A*Zi - Zi*EVi ++ tmp1(:,:) = tmp1(:,:) - tmp2(:,:) ++ ++ ! Get maximum norm of columns of tmp1 ++ errmax = 0.0_rk ++ ++ do i=1,nev ++#if REALCASE == 1 ++ err = 0.0_rk ++#ifdef WITH_MPI ++ call scal_PRECISION_NRM2(na, err, tmp1, 1_BLAS_KIND, i, sc_desc, 1_BLAS_KIND) ++#else /* WITH_MPI */ ++ err = PRECISION_NRM2(na,tmp1(1,i),1_BLAS_KIND) ++#endif /* WITH_MPI */ ++ errmax = max(errmax, err) ++#endif /* REALCASE */ ++ ++#if COMPLEXCASE == 1 ++ xc = 0 ++#ifdef WITH_MPI ++ call scal_PRECISION_DOTC(na, xc, tmp1, 1_BLAS_KIND, i, sc_desc, & ++ 1_BLAS_KIND, tmp1, 1_BLAS_KIND, i, sc_desc, 1_BLAS_KIND) ++#else /* WITH_MPI */ ++ xc = PRECISION_DOTC(na,tmp1,1_BLAS_KIND,tmp1,1_BLAS_KIND) ++#endif /* WITH_MPI */ ++ errmax = max(errmax, sqrt(real(xc,kind=REAL_DATATYPE))) ++#endif /* COMPLEXCASE */ ++ enddo ++ ++ ! Get maximum error norm over all processors ++ err = errmax ++#ifdef WITH_MPI ++ call mpi_allreduce(err, errmax, 1_MPI_KIND, MPI_REAL_PRECISION, MPI_MAX, MPI_COMM_WORLD, mpierr) ++#else /* WITH_MPI */ ++ errmax = err ++#endif /* WITH_MPI */ ++ if (myid==0) print *,'Results of numerical residual checks:' ++ if (myid==0) print *,'Error Residual :',errmax ++ if (nev .ge. 2) then ++ if (errmax .gt. tol_res .or. errmax .eq. 0.0_rk) then ++ status = 1 ++ endif ++ else ++ if (errmax .gt. tol_res) then ++ status = 1 ++ endif ++ endif ++ ++ ! 2. Eigenvector orthogonality ++ if(present(bs)) then ++ !for the generalized EVP, the eigenvectors should be B-orthogonal, not orthogonal ++ ! tmp2 = B * Z ++ tmp2(:,:) = 0.0_rck ++#ifdef WITH_MPI ++ call scal_PRECISION_GEMM('N', 'N', na, nev, na, ONE, bs, 1_BLAS_KIND, 1_BLAS_KIND, & ++ sc_desc, z, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc, ZERO, tmp2, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc) ++#else /* WITH_MPI */ ++ call PRECISION_GEMM('N','N', na, nev, na, ONE, bs, na, z, na, ZERO, tmp2, na) ++#endif /* WITH_MPI */ ++ ++ else ++ tmp2(:,:) = z(:,:) ++ endif ++ ! tmp1 = Z**T * tmp2 ++ ! actually tmp1 = Z**T * Z for standard case and tmp1 = Z**T * B * Z for generalized ++ tmp1 = 0 ++#ifdef WITH_MPI ++ call scal_PRECISION_GEMM(BLAS_TRANS_OR_CONJ, 'N', nev, nev, na, ONE, z, 1_BLAS_KIND, 1_BLAS_KIND, & ++ sc_desc, tmp2, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc, ZERO, & ++ tmp1, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc) ++#else /* WITH_MPI */ ++ call PRECISION_GEMM(BLAS_TRANS_OR_CONJ,'N',nev,nev,na,ONE,z,na,tmp2,na,ZERO,tmp1,na) ++#endif /* WITH_MPI */ ++ ! First check, whether the elements on diagonal are 1 .. "normality" of the vectors ++ err = 0.0_rk ++ do i=1, nev ++ if (map_global_array_index_to_local_index(int(i,kind=c_int), int(i,kind=c_int) , row_Local, col_Local, & ++ int(nblk,kind=c_int), int(np_rows,kind=c_int), & ++ int(np_cols,kind=c_int), int(my_prow,kind=c_int), & ++ int(my_pcol,kind=c_int) )) then ++ rowLocal = int(row_Local,kind=INT_TYPE) ++ colLocal = int(col_Local,kind=INT_TYPE) ++ err = max(err, abs(tmp1(rowLocal,colLocal) - 1.0_rk)) ++ endif ++ end do ++#ifdef WITH_MPI ++ call mpi_allreduce(err, errmax, 1_MPI_KIND, MPI_REAL_PRECISION, MPI_MAX, MPI_COMM_WORLD, mpierr) ++#else /* WITH_MPI */ ++ errmax = err ++#endif /* WITH_MPI */ ++ if (myid==0) print *,'Maximal error in eigenvector lengths:',errmax ++ ++ ! Second, find the maximal error in the whole Z**T * Z matrix (its diference from identity matrix) ++ ! Initialize tmp2 to unit matrix ++ tmp2 = 0 ++#ifdef WITH_MPI ++ call scal_PRECISION_LASET('A', nev, nev, ZERO, ONE, tmp2, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc) ++#else /* WITH_MPI */ ++ call PRECISION_LASET('A',nev,nev,ZERO,ONE,tmp2,na) ++#endif /* WITH_MPI */ ++ ++ ! ! tmp1 = Z**T * Z - Unit Matrix ++ tmp1(:,:) = tmp1(:,:) - tmp2(:,:) ++ ++ ! Get maximum error (max abs value in tmp1) ++ err = maxval(abs(tmp1)) ++#ifdef WITH_MPI ++ call mpi_allreduce(err, errmax, 1_MPI_KIND, MPI_REAL_PRECISION, MPI_MAX, MPI_COMM_WORLD, mpierr) ++#else /* WITH_MPI */ ++ errmax = err ++#endif /* WITH_MPI */ ++ if (myid==0) print *,'Error Orthogonality:',errmax ++ ++ if (nev .ge. 2) then ++ if (errmax .gt. tol_orth .or. errmax .eq. 0.0_rk) then ++ status = 1 ++ endif ++ else ++ if (errmax .gt. tol_orth) then ++ status = 1 ++ endif ++ endif ++ end function ++ ++#if REALCASE == 1 ++#ifdef DOUBLE_PRECISION_REAL ++ !c> TEST_C_INT_TYPE check_correctness_evp_numeric_residuals_real_double_f(TEST_C_INT_TYPE na, TEST_C_INT_TYPE nev, ++ !c> TEST_C_INT_TYPE na_rows, TEST_C_INT_TYPE na_cols, ++ !c> double *as, double *z, double *ev, ++ !c> TEST_C_INT_TYPE sc_desc[9], ++ !c> TEST_C_INT_TYPE nblk, TEST_C_INT_TYPE myid, ++ !c> TEST_C_INT_TYPE np_rows, ++ !c> TEST_C_INT_TYPE np_cols, ++ !c> TEST_C_INT_TYPE my_prow, TEST_C_INT_TYPE my_pcol); ++#else ++ !c> TEST_C_INT_TYPE check_correctness_evp_numeric_residuals_real_single_f(TEST_C_INT_TYPE na, TEST_C_INT_TYPE nev, ++ !c> TEST_C_INT_TYPE na_rows, TEST_C_INT_TYPE na_cols, ++ !c> float *as, float *z, float *ev, ++ !c> TEST_C_INT_TYPE sc_desc[9], ++ !c> TEST_C_INT_TYPE nblk, TEST_C_INT_TYPE myid, ++ !c> TEST_C_INT_TYPE np_rows, ++ !c> TEST_C_INT_TYPE np_cols, ++ !c> TEST_C_INT_TYPE my_prow, TEST_C_INT_TYPE my_pcol); ++#endif ++#endif /* REALCASE */ ++ ++#if COMPLEXCASE == 1 ++#ifdef DOUBLE_PRECISION_COMPLEX ++ !c> TEST_C_INT_TYPE check_correctness_evp_numeric_residuals_complex_double_f(TEST_C_INT_TYPE na, TEST_C_INT_TYPE nev, ++ !c> TEST_C_INT_TYPE na_rows, TEST_C_INT_TYPE na_cols, ++ !c> complex double *as, complex double *z, double *ev, ++ !c> TEST_C_INT_TYPE sc_desc[9], ++ !c> TEST_C_INT_TYPE nblk, TEST_C_INT_TYPE myid, ++ !c> TEST_C_INT_TYPE np_rows, TEST_C_INT_TYPE np_cols, ++ !c> TEST_C_INT_TYPE my_prow, TEST_C_INT_TYPE my_pcol); ++#else ++ !c> TEST_C_INT_TYPE check_correctness_evp_numeric_residuals_complex_single_f(TEST_C_INT_TYPE na, TEST_C_INT_TYPE nev, ++ !c> TEST_C_INT_TYPE na_rows, TEST_C_INT_TYPE na_cols, ++ !c> complex float *as, complex float *z, float *ev, ++ !c> TEST_C_INT_TYPE sc_desc[9], ++ !c> TEST_C_INT_TYPE nblk, TEST_C_INT_TYPE myid, ++ !c> TEST_C_INT_TYPE np_rows, TEST_C_INT_TYPE np_cols, ++ !c> TEST_C_INT_TYPE my_prow, TEST_C_INT_TYPE my_pcol); ++#endif ++#endif /* COMPLEXCASE */ ++ ++function check_correctness_evp_numeric_residuals_& ++&MATH_DATATYPE& ++&_& ++&PRECISION& ++&_f (na, nev, na_rows, na_cols, as, z, ev, sc_desc, nblk, myid, np_rows, np_cols, my_prow, my_pcol) result(status) & ++ bind(C,name="check_correctness_evp_numeric_residuals_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ &_f") ++ ++ use precision_for_tests ++ use iso_c_binding ++ ++ implicit none ++#include "./test_precision_kinds.F90" ++ ++ TEST_INT_TYPE :: status ++ TEST_INT_TYPE, value :: na, nev, myid, na_rows, na_cols, nblk, np_rows, np_cols, my_prow, my_pcol ++ MATH_DATATYPE(kind=rck) :: as(1:na_rows,1:na_cols), z(1:na_rows,1:na_cols) ++ real(kind=rck) :: ev(1:na) ++ TEST_INT_TYPE :: sc_desc(1:9) ++ ++ status = check_correctness_evp_numeric_residuals_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ & (na, nev, as, z, ev, sc_desc, nblk, myid, np_rows, np_cols, my_prow, my_pcol) ++ ++ end function ++ ++!---- variant for the generalized eigenproblem ++!---- unlike in Fortran, we cannot use optional parameter ++!---- we thus define a different function ++#if REALCASE == 1 ++#ifdef DOUBLE_PRECISION_REAL ++ !c> TEST_C_INT_TYPE check_correctness_evp_gen_numeric_residuals_real_double_f(TEST_C_INT_TYPE na, TEST_C_INT_TYPE nev, ++ !c> TEST_C_INT_TYPE na_rows, TEST_C_INT_TYPE na_cols, ++ !c> double *as, double *z, double *ev, ++ !c> TEST_C_INT_TYPE sc_desc[9], ++ !c> TEST_C_INT_TYPE nblk, TEST_C_INT_TYPE myid, ++ !c> TEST_C_INT_TYPE np_rows, TEST_C_INT_TYPE np_cols, ++ !c> TEST_C_INT_TYPE my_prow, TEST_C_INT_TYPE my_pcol, ++ !c> double *bs); ++#else ++ !c> TEST_C_INT_TYPE check_correctness_evp_gen_numeric_residuals_real_single_f(TEST_C_INT_TYPE na, TEST_C_INT_TYPE nev, ++ !c> TEST_C_INT_TYPE na_rows, TEST_C_INT_TYPE na_cols, ++ !c> float *as, float *z, float *ev, ++ !c> TEST_C_INT_TYPE sc_desc[9], ++ !c> TEST_C_INT_TYPE nblk, TEST_C_INT_TYPE myid, ++ !c> TEST_C_INT_TYPE np_rows, ++ !c> TEST_C_INT_TYPE np_cols, ++ !c> TEST_C_INT_TYPE my_prow, ++ !c> TEST_C_INT_TYPE my_pcol, ++ !c> float *bs); ++#endif ++#endif /* REALCASE */ ++ ++#if COMPLEXCASE == 1 ++#ifdef DOUBLE_PRECISION_COMPLEX ++ !c> TEST_C_INT_TYPE check_correctness_evp_gen_numeric_residuals_complex_double_f(TEST_C_INT_TYPE na, TEST_C_INT_TYPE nev, ++ !c> TEST_C_INT_TYPE na_rows, TEST_C_INT_TYPE na_cols, ++ !c> complex double *as, complex double *z, double *ev, ++ !c> TEST_C_INT_TYPE sc_desc[9], ++ !c> TEST_C_INT_TYPE nblk, TEST_C_INT_TYPE myid, ++ !c> TEST_C_INT_TYPE np_rows, TEST_C_INT_TYPE np_cols, ++ !c> TEST_C_INT_TYPE my_prow, TEST_C_INT_TYPE my_pcol, ++ !c> complex double *bs); ++#else ++ !c> TEST_C_INT_TYPE check_correctness_evp_gen_numeric_residuals_complex_single_f(TEST_C_INT_TYPE na, TEST_C_INT_TYPE nev, ++ !c> TEST_C_INT_TYPE na_rows, TEST_C_INT_TYPE na_cols, ++ !c> complex float *as, complex float *z, float *ev, ++ !c> TEST_C_INT_TYPE sc_desc[9], ++ !c> TEST_C_INT_TYPE nblk, TEST_C_INT_TYPE myid, ++ !c> TEST_C_INT_TYPE np_rows, TEST_C_INT_TYPE np_cols, ++ !c> TEST_C_INT_TYPE my_prow, TEST_C_INT_TYPE my_pcol, ++ !c> complex float *bs); ++#endif ++#endif /* COMPLEXCASE */ ++ ++function check_correctness_evp_gen_numeric_residuals_& ++&MATH_DATATYPE& ++&_& ++&PRECISION& ++&_f (na, nev, na_rows, na_cols, as, z, ev, sc_desc, nblk, myid, np_rows, np_cols, my_prow, my_pcol, bs) result(status) & ++ bind(C,name="check_correctness_evp_gen_numeric_residuals_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ &_f") ++ ++ use iso_c_binding ++ use precision_for_tests ++ implicit none ++#include "./test_precision_kinds.F90" ++ ++ TEST_INT_TYPE :: status ++ TEST_INT_TYPE, value :: na, nev, myid, na_rows, na_cols, nblk, np_rows, np_cols, my_prow, my_pcol ++ MATH_DATATYPE(kind=rck) :: as(1:na_rows,1:na_cols), z(1:na_rows,1:na_cols), bs(1:na_rows,1:na_cols) ++ real(kind=rck) :: ev(1:na) ++ TEST_INT_TYPE :: sc_desc(1:9) ++ ++ status = check_correctness_evp_numeric_residuals_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ & (na, nev, as, z, ev, sc_desc, nblk, myid, np_rows, np_cols, my_prow, my_pcol, bs) ++ ++ end function ++ ++ !----------------------------------------------------------------------------------------------------------- ++ ++ function check_correctness_eigenvalues_toeplitz_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ & (na, diagonalElement, subdiagonalElement, ev, z, myid) result(status) ++ use iso_c_binding ++ use precision_for_tests ++ implicit none ++#include "./test_precision_kinds.F90" ++ ++ TEST_INT_TYPE :: status, ii, j, myid ++ TEST_INT_TYPE, intent(in) :: na ++ real(kind=rck) :: diagonalElement, subdiagonalElement ++ real(kind=rck) :: ev_analytic(na), ev(na) ++ MATH_DATATYPE(kind=rck) :: z(:,:) ++ ++#if defined(DOUBLE_PRECISION_REAL) || defined(DOUBLE_PRECISION_COMPLEX) ++ real(kind=rck), parameter :: pi = 3.141592653589793238462643383279_c_double ++#else ++ real(kind=rck), parameter :: pi = 3.1415926535897932_c_float ++#endif ++ real(kind=rck) :: tmp, maxerr ++ TEST_INT_TYPE :: loctmp ++ status = 0 ++ ++ ! analytic solution ++ do ii=1, na ++ ev_analytic(ii) = diagonalElement + 2.0_rk * & ++ subdiagonalElement *cos( pi*real(ii,kind=rk)/ & ++ real(na+1,kind=rk) ) ++ enddo ++ ++ ! sort analytic solution: ++ ++ ! this hack is neither elegant, nor optimized: for huge matrixes it might be expensive ++ ! a proper sorting algorithmus might be implemented here ++ ++ tmp = minval(ev_analytic) ++ loctmp = minloc(ev_analytic, 1) ++ ++ ev_analytic(loctmp) = ev_analytic(1) ++ ev_analytic(1) = tmp ++ do ii=2, na ++ tmp = ev_analytic(ii) ++ do j= ii, na ++ if (ev_analytic(j) .lt. tmp) then ++ tmp = ev_analytic(j) ++ loctmp = j ++ endif ++ enddo ++ ev_analytic(loctmp) = ev_analytic(ii) ++ ev_analytic(ii) = tmp ++ enddo ++ ++ ! compute a simple error max of eigenvalues ++ maxerr = 0.0 ++ maxerr = maxval( (ev(:) - ev_analytic(:))/ev_analytic(:) , 1) ++ ++#if defined(DOUBLE_PRECISION_REAL) || defined(DOUBLE_PRECISION_COMPLEX) ++ if (abs(maxerr) .gt. 8.e-13_c_double) then ++#else ++ if (abs(maxerr) .gt. 8.e-4_c_float) then ++#endif ++ status = 1 ++ if (myid .eq. 0) then ++ print *,"Result of Toeplitz matrix test: " ++ print *,"Eigenvalues differ from analytic solution: maxerr = ",abs(maxerr) ++ endif ++ endif ++ ++ if (status .eq. 0) then ++ if (myid .eq. 0) then ++ print *,"Result of Toeplitz matrix test: test passed" ++ print *,"Eigenvalues differ from analytic solution: maxerr = ",abs(maxerr) ++ endif ++ endif ++ end function ++ ++ function check_correctness_cholesky_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ & (na, a, as, na_rows, sc_desc, myid) result(status) ++ use precision_for_tests ++ use tests_blas_interfaces ++ use tests_scalapack_interfaces ++ implicit none ++#include "./test_precision_kinds.F90" ++ TEST_INT_TYPE :: status ++ TEST_INT_TYPE, intent(in) :: na, myid, na_rows ++ ++ MATH_DATATYPE(kind=rck), intent(in) :: a(:,:), as(:,:) ++ MATH_DATATYPE(kind=rck), dimension(size(as,dim=1),size(as,dim=2)) :: tmp1, tmp2 ++#if COMPLEXCASE == 1 ++ ! needed for [z,c]lange from scalapack ++ real(kind=rk), dimension(2*size(as,dim=1),size(as,dim=2)) :: tmp1_real ++#endif ++ real(kind=rk) :: norm, normmax ++ ++ TEST_INT_TYPE :: sc_desc(:) ++ real(kind=rck) :: err, errmax ++ TEST_INT_MPI_TYPE :: mpierr ++ ++ status = 0 ++ tmp1(:,:) = 0.0_rck ++ ++ ++#if REALCASE == 1 ++ ! tmp1 = a**T ++#ifdef WITH_MPI ++ call p& ++ &BLAS_CHAR& ++ &tran(na, na, 1.0_rck, a, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc, & ++ 0.0_rck, tmp1, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc) ++#else /* WITH_MPI */ ++ tmp1 = transpose(a) ++#endif /* WITH_MPI */ ++#endif /* REALCASE == 1 */ ++ ++#if COMPLEXCASE == 1 ++ ! tmp1 = a**H ++#ifdef WITH_MPI ++ call p& ++ &BLAS_CHAR& ++ &tranc(na, na, ONE, a, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc, & ++ ZERO, tmp1, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc) ++#else /* WITH_MPI */ ++ tmp1 = transpose(conjg(a)) ++#endif /* WITH_MPI */ ++#endif /* COMPLEXCASE == 1 */ ++ ++ ! tmp2 = a**T * a ++#ifdef WITH_MPI ++ call p& ++ &BLAS_CHAR& ++ &gemm("N","N", na, na, na, ONE, tmp1, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc, & ++ a, 1_BLAS_KIND, 1_BLAS_KIND, & ++ sc_desc, ZERO, tmp2, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc) ++#else /* WITH_MPI */ ++ call BLAS_CHAR& ++ &gemm("N","N", na, na, na, ONE, tmp1, na, a, na, ZERO, tmp2, na) ++#endif /* WITH_MPI */ ++ ++ ! compare tmp2 with original matrix ++ tmp2(:,:) = tmp2(:,:) - as(:,:) ++ ++#ifdef WITH_MPI ++ norm = p& ++ &BLAS_CHAR& ++ &lange("M",na, na, tmp2, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc, & ++#if COMPLEXCASE == 1 ++ tmp1_real) ++#else ++ tmp1) ++#endif ++#else /* WITH_MPI */ ++ norm = BLAS_CHAR& ++ &lange("M", na, na, tmp2, na_rows, & ++#if COMPLEXCASE == 1 ++ tmp1_real) ++#else ++ tmp1) ++#endif ++#endif /* WITH_MPI */ ++ ++ ++#ifdef WITH_MPI ++ call mpi_allreduce(norm, normmax, 1_MPI_KIND, MPI_REAL_PRECISION, MPI_MAX, MPI_COMM_WORLD, mpierr) ++#else /* WITH_MPI */ ++ normmax = norm ++#endif /* WITH_MPI */ ++ ++ if (myid .eq. 0) then ++ print *," Maximum error of result: ", normmax ++ endif ++ ++#if REALCASE == 1 ++#ifdef DOUBLE_PRECISION_REAL ++! if (normmax .gt. 5e-12_rk8 .or. normmax .eq. 0.0_rk8) then ++ if (normmax .gt. 5e-12_rk8) then ++ status = 1 ++ endif ++#else ++! if (normmax .gt. 5e-4_rk4 .or. normmax .eq. 0.0_rk4) then ++ if (normmax .gt. 5e-4_rk4 ) then ++ status = 1 ++ endif ++#endif ++#endif ++ ++#if COMPLEXCASE == 1 ++#ifdef DOUBLE_PRECISION_COMPLEX ++! if (normmax .gt. 5e-11_rk8 .or. normmax .eq. 0.0_rk8) then ++ if (normmax .gt. 5e-11_rk8 ) then ++ status = 1 ++ endif ++#else ++! if (normmax .gt. 5e-3_rk4 .or. normmax .eq. 0.0_rk4) then ++ if (normmax .gt. 5e-3_rk4) then ++ status = 1 ++ endif ++#endif ++#endif ++ end function ++ ++ function check_correctness_hermitian_multiply_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ & (na, a, b, c, na_rows, sc_desc, myid) result(status) ++ use precision_for_tests ++ use tests_blas_interfaces ++ use tests_scalapack_interfaces ++ implicit none ++#include "./test_precision_kinds.F90" ++ TEST_INT_TYPE :: status ++ TEST_INT_TYPE, intent(in) :: na, myid, na_rows ++ MATH_DATATYPE(kind=rck), intent(in) :: a(:,:), b(:,:), c(:,:) ++ MATH_DATATYPE(kind=rck), dimension(size(a,dim=1),size(a,dim=2)) :: tmp1, tmp2 ++#if COMPLEXCASE == 1 ++ real(kind=rk), dimension(2*size(a,dim=1),size(a,dim=2)) :: tmp1_real ++#endif ++ real(kind=rck) :: norm, normmax ++ ++ ++ TEST_INT_TYPE :: sc_desc(:) ++ real(kind=rck) :: err, errmax ++ TEST_INT_MPI_TYPE :: mpierr ++ ++ status = 0 ++ tmp1(:,:) = ZERO ++ ++#if REALCASE == 1 ++ ! tmp1 = a**T ++#ifdef WITH_MPI ++ call p& ++ &BLAS_CHAR& ++ &tran(na, na, ONE, a, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc, ZERO, tmp1, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc) ++#else /* WITH_MPI */ ++ tmp1 = transpose(a) ++#endif /* WITH_MPI */ ++ ++#endif /* REALCASE == 1 */ ++ ++#if COMPLEXCASE == 1 ++ ! tmp1 = a**H ++#ifdef WITH_MPI ++ call p& ++ &BLAS_CHAR& ++ &tranc(na, na, ONE, a, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc, ZERO, tmp1, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc) ++#else /* WITH_MPI */ ++ tmp1 = transpose(conjg(a)) ++#endif /* WITH_MPI */ ++#endif /* COMPLEXCASE == 1 */ ++ ++ ! tmp2 = tmp1 * b ++#ifdef WITH_MPI ++ call p& ++ &BLAS_CHAR& ++ &gemm("N","N", na, na, na, ONE, tmp1, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc, b, 1_BLAS_KIND, 1_BLAS_KIND, & ++ sc_desc, ZERO, tmp2, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc) ++#else ++ call BLAS_CHAR& ++ &gemm("N","N", na, na, na, ONE, tmp1, na, b, na, ZERO, tmp2, na) ++#endif ++ ++ ! compare tmp2 with c ++ tmp2(:,:) = tmp2(:,:) - c(:,:) ++ ++#ifdef WITH_MPI ++ ! dirty hack: the last argument should be a real array, but is not referenced ++ ! if mode = "M", thus we get away with a complex argument ++ norm = p& ++ &BLAS_CHAR& ++ &lange("M", na, na, tmp2, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc, & ++#if COMPLEXCASE == 1 ++ tmp1_real) ++#else ++ tmp1) ++#endif ++#else /* WITH_MPI */ ++ ! dirty hack: the last argument should be a real array, but is not referenced ++ ! if mode = "M", thus we get away with a complex argument ++ norm = BLAS_CHAR& ++ &lange("M", na, na, tmp2, na_rows, & ++#if COMPLEXCASE == 1 ++ tmp1_real) ++#else ++ tmp1) ++#endif ++#endif /* WITH_MPI */ ++ ++#ifdef WITH_MPI ++ call mpi_allreduce(norm, normmax, 1_MPI_KIND, MPI_REAL_PRECISION, MPI_MAX, MPI_COMM_WORLD, mpierr) ++#else /* WITH_MPI */ ++ normmax = norm ++#endif /* WITH_MPI */ ++ ++ if (myid .eq. 0) then ++ print *," Maximum error of result: ", normmax ++ endif ++ ++#ifdef DOUBLE_PRECISION_REAL ++ if (normmax .gt. 5e-11_rk8 ) then ++ status = 1 ++ endif ++#else ++ if (normmax .gt. 5e-3_rk4 ) then ++ status = 1 ++ endif ++#endif ++ ++#ifdef DOUBLE_PRECISION_COMPLEX ++ if (normmax .gt. 5e-11_rk8 ) then ++ status = 1 ++ endif ++#else ++ if (normmax .gt. 5e-3_rk4 ) then ++ status = 1 ++ endif ++#endif ++ end function ++ ++ function check_correctness_eigenvalues_frank_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ & (na, ev, z, myid) result(status) ++ use iso_c_binding ++ use precision_for_tests ++ implicit none ++#include "./test_precision_kinds.F90" ++ ++ TEST_INT_TYPE :: status, i, j, myid ++ TEST_INT_TYPE, intent(in) :: na ++ real(kind=rck) :: ev_analytic(na), ev(na) ++ MATH_DATATYPE(kind=rck) :: z(:,:) ++ ++#if defined(DOUBLE_PRECISION_REAL) || defined(DOUBLE_PRECISION_COMPLEX) ++ real(kind=rck), parameter :: pi = 3.141592653589793238462643383279_c_double ++#else ++ real(kind=rck), parameter :: pi = 3.1415926535897932_c_float ++#endif ++ real(kind=rck) :: tmp, maxerr ++ TEST_INT_TYPE :: loctmp ++ status = 0 ++ ++ ! analytic solution ++ do i = 1, na ++ j = na - i ++#if defined(DOUBLE_PRECISION_REAL) || defined(DOUBLE_PRECISION_COMPLEX) ++ ev_analytic(i) = pi * (2.0_c_double * real(j,kind=c_double) + 1.0_c_double) / & ++ (2.0_c_double * real(na,kind=c_double) + 1.0_c_double) ++ ev_analytic(i) = 0.5_c_double / (1.0_c_double - cos(ev_analytic(i))) ++#else ++ ev_analytic(i) = pi * (2.0_c_float * real(j,kind=c_float) + 1.0_c_float) / & ++ (2.0_c_float * real(na,kind=c_float) + 1.0_c_float) ++ ev_analytic(i) = 0.5_c_float / (1.0_c_float - cos(ev_analytic(i))) ++#endif ++ enddo ++ ++ ! sort analytic solution: ++ ++ ! this hack is neither elegant, nor optimized: for huge matrixes it might be expensive ++ ! a proper sorting algorithmus might be implemented here ++ ++ tmp = minval(ev_analytic) ++ loctmp = minloc(ev_analytic, 1) ++ ++ ev_analytic(loctmp) = ev_analytic(1) ++ ev_analytic(1) = tmp ++ do i=2, na ++ tmp = ev_analytic(i) ++ do j= i, na ++ if (ev_analytic(j) .lt. tmp) then ++ tmp = ev_analytic(j) ++ loctmp = j ++ endif ++ enddo ++ ev_analytic(loctmp) = ev_analytic(i) ++ ev_analytic(i) = tmp ++ enddo ++ ++ ! compute a simple error max of eigenvalues ++ maxerr = 0.0 ++ maxerr = maxval( (ev(:) - ev_analytic(:))/ev_analytic(:) , 1) ++ ++#if defined(DOUBLE_PRECISION_REAL) || defined(DOUBLE_PRECISION_COMPLEX) ++ if (maxerr .gt. 8.e-13_c_double) then ++#else ++ if (maxerr .gt. 8.e-4_c_float) then ++#endif ++ status = 1 ++ if (myid .eq. 0) then ++ print *,"Result of Frank matrix test: " ++ print *,"Eigenvalues differ from analytic solution: maxerr = ",maxerr ++ endif ++ endif ++ end function ++ ++! vim: syntax=fortran +diff -ruN elpa-new_release_2021.11.001/examples/shared/test_output_type.F90 elpa-new_release_2021.11.001_ok/examples/shared/test_output_type.F90 +--- elpa-new_release_2021.11.001/examples/shared/test_output_type.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/shared/test_output_type.F90 2022-01-26 10:10:58.372166000 +0100 +@@ -0,0 +1,11 @@ ++#include "config-f90.h" ++ ++module test_output_type ++ ++ type :: output_t ++ logical :: eigenvectors ++ logical :: eigenvalues ++ end type ++ ++ ++end module +diff -ruN elpa-new_release_2021.11.001/examples/shared/test_precision_kinds.F90 elpa-new_release_2021.11.001_ok/examples/shared/test_precision_kinds.F90 +--- elpa-new_release_2021.11.001/examples/shared/test_precision_kinds.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/shared/test_precision_kinds.F90 2022-01-26 10:10:58.373135000 +0100 +@@ -0,0 +1,25 @@ ++#ifdef REALCASE ++#ifdef DOUBLE_PRECISION ++ integer, parameter :: rk = C_DOUBLE ++ integer, parameter :: rck = C_DOUBLE ++#endif ++#ifdef SINGLE_PRECISION ++ integer, parameter :: rk = C_FLOAT ++ integer, parameter :: rck = C_FLOAT ++#endif ++ real(kind=rck), parameter :: ZERO=0.0_rk, ONE = 1.0_rk ++#endif ++ ++#ifdef COMPLEXCASE ++#ifdef DOUBLE_PRECISION ++ integer, parameter :: rk = C_DOUBLE ++ integer, parameter :: ck = C_DOUBLE_COMPLEX ++ integer, parameter :: rck = C_DOUBLE_COMPLEX ++#endif ++#ifdef SINGLE_PRECISION ++ integer, parameter :: rk = C_FLOAT ++ integer, parameter :: ck = C_FLOAT_COMPLEX ++ integer, parameter :: rck = C_FLOAT_COMPLEX ++#endif ++ complex(kind=rck), parameter :: ZERO = (0.0_rk,0.0_rk), ONE = (1.0_rk,0.0_rk) ++#endif +diff -ruN elpa-new_release_2021.11.001/examples/shared/test_prepare_matrix.F90 elpa-new_release_2021.11.001_ok/examples/shared/test_prepare_matrix.F90 +--- elpa-new_release_2021.11.001/examples/shared/test_prepare_matrix.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/shared/test_prepare_matrix.F90 2022-01-26 10:10:58.374062000 +0100 +@@ -0,0 +1,145 @@ ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Max Planck Computing and Data Facility (MPCDF), formerly known as ++! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++! ++! Author: A. Marek, MPCDF ++#include "config-f90.h" ++ ++module test_prepare_matrix ++ ++ use precision_for_tests ++ interface prepare_matrix_random ++ module procedure prepare_matrix_random_complex_double ++ module procedure prepare_matrix_random_real_double ++#ifdef WANT_SINGLE_PRECISION_REAL ++ module procedure prepare_matrix_random_real_single ++#endif ++#ifdef WANT_SINGLE_PRECISION_COMPLEX ++ module procedure prepare_matrix_random_complex_single ++#endif ++ end interface ++ ++ ++ interface prepare_matrix_random_spd ++ module procedure prepare_matrix_random_spd_complex_double ++ module procedure prepare_matrix_random_spd_real_double ++#ifdef WANT_SINGLE_PRECISION_REAL ++ module procedure prepare_matrix_random_spd_real_single ++#endif ++#ifdef WANT_SINGLE_PRECISION_COMPLEX ++ module procedure prepare_matrix_random_spd_complex_single ++#endif ++ end interface ++ ++ ++ interface prepare_matrix_toeplitz ++ module procedure prepare_matrix_toeplitz_complex_double ++ module procedure prepare_matrix_toeplitz_real_double ++ module procedure prepare_matrix_toeplitz_mixed_complex_complex_double ++#ifdef WANT_SINGLE_PRECISION_REAL ++ module procedure prepare_matrix_toeplitz_real_single ++#endif ++#ifdef WANT_SINGLE_PRECISION_COMPLEX ++ module procedure prepare_matrix_toeplitz_complex_single ++ module procedure prepare_matrix_toeplitz_mixed_complex_complex_single ++#endif ++ end interface ++ ++ interface prepare_matrix_frank ++ module procedure prepare_matrix_frank_complex_double ++ module procedure prepare_matrix_frank_real_double ++#ifdef WANT_SINGLE_PRECISION_REAL ++ module procedure prepare_matrix_frank_real_single ++#endif ++#ifdef WANT_SINGLE_PRECISION_COMPLEX ++ module procedure prepare_matrix_frank_complex_single ++#endif ++ end interface ++ ++ ++ ++ private prows, pcols, map_global_array_index_to_local_index ++ ++ contains ++ ++#include "../../src/general/prow_pcol.F90" ++#include "../../src/general/map_global_to_local.F90" ++ ++#define COMPLEXCASE 1 ++#define DOUBLE_PRECISION 1 ++#include "../../src/general/precision_macros.h" ++#include "test_prepare_matrix_template.F90" ++#undef DOUBLE_PRECISION ++#undef COMPLEXCASE ++ ++ ++#ifdef WANT_SINGLE_PRECISION_COMPLEX ++ ++ ++#define COMPLEXCASE 1 ++#define SINGLE_PRECISION 1 ++#include "../../src/general/precision_macros.h" ++#include "test_prepare_matrix_template.F90" ++#undef SINGLE_PRECISION ++#undef COMPLEXCASE ++#endif /* WANT_SINGLE_PRECISION_COMPLEX */ ++ ++ ++#define REALCASE 1 ++#define DOUBLE_PRECISION 1 ++#include "../../src/general/precision_macros.h" ++#include "test_prepare_matrix_template.F90" ++#undef DOUBLE_PRECISION ++#undef REALCASE ++ ++#ifdef WANT_SINGLE_PRECISION_REAL ++ ++ ++#define REALCASE 1 ++#define SINGLE_PRECISION 1 ++#include "../../src/general/precision_macros.h" ++#include "test_prepare_matrix_template.F90" ++#undef SINGLE_PRECISION ++#undef REALCASE ++ ++#endif /* WANT_SINGLE_PRECISION_REAL */ ++ ++ ++end module +diff -ruN elpa-new_release_2021.11.001/examples/shared/test_prepare_matrix_template.F90 elpa-new_release_2021.11.001_ok/examples/shared/test_prepare_matrix_template.F90 +--- elpa-new_release_2021.11.001/examples/shared/test_prepare_matrix_template.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/shared/test_prepare_matrix_template.F90 2022-01-26 10:10:58.375330000 +0100 +@@ -0,0 +1,510 @@ ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Max Planck Computing and Data Facility (MPCDF), formerly known as ++! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++! ++! Author: A. Marek, MPCDF ++ ++#include "config-f90.h" ++ ++#ifdef HAVE_64BIT_INTEGER_MATH_SUPPORT ++#define TEST_INT_TYPE integer(kind=c_int64_t) ++#define INT_TYPE c_int64_t ++#define TEST_C_INT_TYPE_PTR long int* ++#define TEST_C_INT_TYPE long int ++#else ++#define TEST_INT_TYPE integer(kind=c_int32_t) ++#define INT_TYPE c_int32_t ++#define TEST_C_INT_TYPE_PTR int* ++#define TEST_C_INT_TYPE int ++#endif ++#ifdef HAVE_64BIT_INTEGER_MPI_SUPPORT ++#define TEST_INT_MPI_TYPE integer(kind=c_int64_t) ++#define INT_MPI_TYPE c_int64_t ++#define TEST_C_INT_MPI_TYPE_PTR long int* ++#define TEST_C_INT_MPI_TYPE long int ++#else ++#define TEST_INT_MPI_TYPE integer(kind=c_int32_t) ++#define INT_MPI_TYPE c_int32_t ++#define TEST_C_INT_MPI_TYPE_PTR int* ++#define TEST_C_INT_MPI_TYPE int ++#endif ++ ++ ++ subroutine prepare_matrix_random_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ & (na, myid, sc_desc, a, z, as, is_skewsymmetric) ++ ++ ++ !use test_util ++ use tests_scalapack_interfaces ++ ++ implicit none ++#include "./test_precision_kinds.F90" ++ TEST_INT_TYPE, intent(in) :: myid, na, sc_desc(:) ++ MATH_DATATYPE(kind=rck), intent(inout) :: z(:,:), a(:,:), as(:,:) ++ ++#if COMPLEXCASE == 1 ++ real(kind=rk) :: xr(size(a,dim=1), size(a,dim=2)) ++#endif /* COMPLEXCASE */ ++ ++ integer(kind=c_int), allocatable :: iseed(:) ++ integer(kind=c_int) :: n ++ integer(kind=c_int), intent(in), optional :: is_skewsymmetric ++ logical :: skewsymmetric ++ ++ if (present(is_skewsymmetric)) then ++ if (is_skewsymmetric .eq. 1) then ++ skewsymmetric = .true. ++ else ++ skewsymmetric = .false. ++ endif ++ else ++ skewsymmetric = .false. ++ endif ++ ++ ! for getting a hermitian test matrix A we get a random matrix Z ++ ! and calculate A = Z + Z**H ++ ! in case of a skewsymmetric matrix A = Z - Z**H ++ ++ ! we want different random numbers on every process ++ ! (otherwise A might get rank deficient): ++ ++ call random_seed(size=n) ++ allocate(iseed(n)) ++ iseed(:) = myid ++ call random_seed(put=iseed) ++#if REALCASE == 1 ++ call random_number(z) ++ ++ a(:,:) = z(:,:) ++#endif /* REALCASE */ ++ ++#if COMPLEXCASE == 1 ++ call random_number(xr) ++ ++ z(:,:) = xr(:,:) ++ call RANDOM_NUMBER(xr) ++ z(:,:) = z(:,:) + (0.0_rk,1.0_rk)*xr(:,:) ++ a(:,:) = z(:,:) ++#endif /* COMPLEXCASE */ ++ ++ if (myid == 0) then ++ print '(a)','| Random matrix block has been set up. (only processor 0 confirms this step)' ++ endif ++ ++#if REALCASE == 1 ++#ifdef WITH_MPI ++ if (skewsymmetric) then ++ call p& ++ &BLAS_CHAR& ++ &tran(int(na,kind=BLAS_KIND), int(na,kind=BLAS_KIND), -ONE, z, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc, & ++ ONE, a, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc) ! A = A + Z**T ++ else ++ call p& ++ &BLAS_CHAR& ++ &tran(int(na,kind=BLAS_KIND), int(na,kind=BLAS_KIND), ONE, z, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc, & ++ ONE, a, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc) ! A = A + Z**T ++ endif ++#else /* WITH_MPI */ ++ if (skewsymmetric) then ++ a = a - transpose(z) ++ else ++ a = a + transpose(z) ++ endif ++#endif /* WITH_MPI */ ++#endif /* REALCASE */ ++ ++#if COMPLEXCASE == 1 ++#ifdef WITH_MPI ++ if (skewsymmetric) then ++ call p& ++ &BLAS_CHAR& ++ &tranc(int(na,kind=BLAS_KIND), int(na,kind=BLAS_KIND), -ONE, z, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc, & ++ ONE, a, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc) ! A = A + Z**H ++ else ++ call p& ++ &BLAS_CHAR& ++ &tranc(int(na,kind=BLAS_KIND), int(na,kind=BLAS_KIND), ONE, z, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc, & ++ ONE, a, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc) ! A = A + Z**H ++ endif ++#else /* WITH_MPI */ ++ if (skewsymmetric) then ++ a = a - transpose(conjg(z)) ++ else ++ a = a + transpose(conjg(z)) ++ endif ++#endif /* WITH_MPI */ ++#endif /* COMPLEXCASE */ ++ ++ ++ if (myid == 0) then ++ print '(a)','| Random matrix block has been symmetrized' ++ endif ++ ++ ! save original matrix A for later accuracy checks ++ ++ as = a ++ ++ deallocate(iseed) ++ ++ end subroutine ++ ++#if REALCASE == 1 ++#ifdef DOUBLE_PRECISION_REAL ++ !c> void prepare_matrix_random_real_double_f(TEST_C_INT_TYPE na, TEST_C_INT_TYPE myid, TEST_C_INT_TYPE na_rows, ++ !c> TEST_C_INT_TYPE na_cols, TEST_C_INT_TYPE sc_desc[9], ++ !c> double *a, double *z, double *as); ++#else ++ !c> void prepare_matrix_random_real_single_f(TEST_C_INT_TYPE na, TEST_C_INT_TYPE myid, TEST_C_INT_TYPE na_rows, ++ !c> TEST_C_INT_TYPE na_cols, TEST_C_INT_TYPE sc_desc[9], ++ !c> float *a, float *z, float *as); ++#endif ++#endif /* REALCASE */ ++ ++#if COMPLEXCASE == 1 ++#ifdef DOUBLE_PRECISION_COMPLEX ++ !c> void prepare_matrix_random_complex_double_f(TEST_C_INT_TYPE na, TEST_C_INT_TYPE myid, TEST_C_INT_TYPE na_rows, ++ !c> TEST_C_INT_TYPE na_cols, TEST_C_INT_TYPE sc_desc[9], ++ !c> complex double *a, complex double *z, complex double *as); ++#else ++ !c> void prepare_matrix_random_complex_single_f(TEST_C_INT_TYPE na, TEST_C_INT_TYPE myid, TEST_C_INT_TYPE na_rows, ++ !c> TEST_C_INT_TYPE na_cols, TEST_C_INT_TYPE sc_desc[9], ++ !c> complex float *a, complex float *z, complex float *as); ++#endif ++#endif /* COMPLEXCASE */ ++ ++subroutine prepare_matrix_random_& ++&MATH_DATATYPE& ++&_wrapper_& ++&PRECISION& ++& (na, myid, na_rows, na_cols, sc_desc, a, z, as) & ++ bind(C, name="prepare_matrix_random_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ &_f") ++ use iso_c_binding ++ ++ implicit none ++#include "./test_precision_kinds.F90" ++ ++ TEST_INT_TYPE , value :: myid, na, na_rows, na_cols ++ TEST_INT_TYPE :: sc_desc(1:9) ++ MATH_DATATYPE(kind=rck) :: z(1:na_rows,1:na_cols), a(1:na_rows,1:na_cols), & ++ as(1:na_rows,1:na_cols) ++ call prepare_matrix_random_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ & (na, myid, sc_desc, a, z, as) ++ end subroutine ++ ++!---------------------------------------------------------------------------------------------------------------- ++ ++ subroutine prepare_matrix_random_spd_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ & (na, myid, sc_desc, a, z, as, nblk, np_rows, np_cols, my_prow, my_pcol) ++ ++ !use test_util ++ use precision_for_tests ++ implicit none ++#include "./test_precision_kinds.F90" ++ TEST_INT_TYPE, intent(in) :: myid, na, sc_desc(:) ++ MATH_DATATYPE(kind=rck), intent(inout) :: z(:,:), a(:,:), as(:,:) ++ TEST_INT_TYPE, intent(in) :: nblk, np_rows, np_cols, my_prow, my_pcol ++ ++ TEST_INT_TYPE :: ii ++ integer(kind=c_int) :: rowLocal, colLocal ++ ++ ++ call prepare_matrix_random_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ & (na, myid, sc_desc, a, z, as) ++ ++ ! hermitian diagonaly dominant matrix => positive definite ++ do ii=1, na ++ if (map_global_array_index_to_local_index(int(ii,kind=c_int), int(ii,kind=c_int), & ++ rowLocal, colLocal, & ++ int(nblk,kind=c_int), int(np_rows,kind=c_int), & ++ int(np_cols,kind=c_int), int(my_prow,kind=c_int), & ++ int(my_pcol,kind=c_int) )) then ++ a(int(rowLocal,kind=INT_TYPE),int(colLocal,kind=INT_TYPE)) = & ++ real(a(int(rowLocal,kind=INT_TYPE), int(colLocal,kind=INT_TYPE))) + na + 1 ++ end if ++ end do ++ ++ as = a ++ ++ end subroutine ++ ++#if REALCASE == 1 ++#ifdef DOUBLE_PRECISION_REAL ++ !c> void prepare_matrix_random_spd_real_double_f(TEST_C_INT_TYPE na, TEST_C_INT_TYPE myid, TEST_C_INT_TYPE na_rows, ++ !c> TEST_C_INT_TYPE na_cols, TEST_C_INT_TYPE sc_desc[9], ++ !c> double *a, double *z, double *as, ++ !c> TEST_C_INT_TYPE nblk, TEST_C_INT_TYPE np_rows, TEST_C_INT_TYPE np_cols, ++ !c> TEST_C_INT_TYPE my_prow, TEST_C_INT_TYPE my_pcol); ++#else ++ !c> void prepare_matrix_random_spd_real_single_f(TEST_C_INT_TYPE na, TEST_C_INT_TYPE myid, TEST_C_INT_TYPE na_rows, ++ !c> TEST_C_INT_TYPE na_cols, TEST_C_INT_TYPE sc_desc[9], ++ !c> float *a, float *z, float *as, ++ !c> TEST_C_INT_TYPE nblk, TEST_C_INT_TYPE np_rows, TEST_C_INT_TYPE np_cols, ++ !c> TEST_C_INT_TYPE my_prow, TEST_C_INT_TYPE my_pcol); ++#endif ++#endif /* REALCASE */ ++ ++#if COMPLEXCASE == 1 ++#ifdef DOUBLE_PRECISION_COMPLEX ++ !c> void prepare_matrix_random_spd_complex_double_f(TEST_C_INT_TYPE na, TEST_C_INT_TYPE myid, TEST_C_INT_TYPE na_rows, ++ !c> TEST_C_INT_TYPE na_cols, TEST_C_INT_TYPE sc_desc[9], ++ !c> complex double *a, complex double *z, complex double *as, ++ !c> TEST_C_INT_TYPE nblk, TEST_C_INT_TYPE np_rows, ++ !c> TEST_C_INT_TYPE np_cols, TEST_C_INT_TYPE my_prow, TEST_C_INT_TYPE my_pcol); ++#else ++ !c> void prepare_matrix_random_spd_complex_single_f(TEST_C_INT_TYPE na, TEST_C_INT_TYPE myid, TEST_C_INT_TYPE na_rows, ++ !c> TEST_C_INT_TYPE na_cols, TEST_C_INT_TYPE sc_desc[9], ++ !c> complex float *a, complex float *z, complex float *as, ++ !c> TEST_C_INT_TYPE nblk, TEST_C_INT_TYPE np_rows, ++ !c> TEST_C_INT_TYPE np_cols, TEST_C_INT_TYPE my_prow, TEST_C_INT_TYPE my_pcol); ++#endif ++#endif /* COMPLEXCASE */ ++ ++subroutine prepare_matrix_random_spd_& ++&MATH_DATATYPE& ++&_wrapper_& ++&PRECISION& ++& (na, myid, na_rows, na_cols, sc_desc, a, z, as, nblk, np_rows, np_cols, my_prow, my_pcol) & ++ bind(C, name="prepare_matrix_random_spd_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ &_f") ++ use iso_c_binding ++ ++ implicit none ++#include "./test_precision_kinds.F90" ++ ++ TEST_INT_TYPE , value :: myid, na, na_rows, na_cols ++ TEST_INT_TYPE :: sc_desc(1:9) ++ MATH_DATATYPE(kind=rck) :: z(1:na_rows,1:na_cols), a(1:na_rows,1:na_cols), & ++ as(1:na_rows,1:na_cols) ++ TEST_INT_TYPE , value :: nblk, np_rows, np_cols, my_prow, my_pcol ++ call prepare_matrix_random_spd_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ & (na, myid, sc_desc, a, z, as, nblk, np_rows, np_cols, my_prow, my_pcol) ++ end subroutine ++ ++ ++!---------------------------------------------------------------------------------------------------------------- ++ ++ subroutine prepare_matrix_toeplitz_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ & (na, diagonalElement, subdiagonalElement, d, sd, ds, sds, a, as, & ++ nblk, np_rows, np_cols, my_prow, my_pcol) ++ !use test_util ++ use precision_for_tests ++ implicit none ++#include "./test_precision_kinds.F90" ++ ++ TEST_INT_TYPE, intent(in) :: na, nblk, np_rows, np_cols, my_prow, my_pcol ++ MATH_DATATYPE(kind=rck) :: diagonalElement, subdiagonalElement ++ MATH_DATATYPE(kind=rck) :: d(:), sd(:), ds(:), sds(:) ++ MATH_DATATYPE(kind=rck) :: a(:,:), as(:,:) ++ ++ TEST_INT_TYPE :: ii ++ integer(kind=c_int) :: rowLocal, colLocal ++ ++ d(:) = diagonalElement ++ sd(:) = subdiagonalElement ++ a(:,:) = ZERO ++ ++ ! set up the diagonal and subdiagonals (for general solver test) ++ do ii=1, na ! for diagonal elements ++ if (map_global_array_index_to_local_index(int(ii,kind=c_int), int(ii,kind=c_int), rowLocal, & ++ colLocal, int(nblk,kind=c_int), int(np_rows,kind=c_int), & ++ int(np_cols,kind=c_int), int(my_prow,kind=c_int), & ++ int(my_pcol,kind=c_int) ) ) then ++ a(int(rowLocal,kind=INT_TYPE),int(colLocal,kind=INT_TYPE)) = diagonalElement ++ endif ++ enddo ++ do ii=1, na-1 ++ if (map_global_array_index_to_local_index(int(ii,kind=c_int), int(ii+1,kind=c_int), rowLocal, & ++ colLocal, int(nblk,kind=c_int), int(np_rows,kind=c_int), & ++ int(np_cols,kind=c_int), int(my_prow,kind=c_int), & ++ int(my_pcol,kind=c_int) ) ) then ++ a(int(rowLocal,kind=INT_TYPE),int(colLocal,kind=INT_TYPE)) = subdiagonalElement ++ endif ++ enddo ++ ++ do ii=2, na ++ if (map_global_array_index_to_local_index(int(ii,kind=c_int), int(ii-1,kind=c_int), rowLocal, & ++ colLocal, int(nblk,kind=c_int), int(np_rows,kind=c_int), & ++ int(np_cols,kind=c_int), int(my_prow,kind=c_int), & ++ int(my_pcol,kind=c_int) ) ) then ++ a(int(rowLocal,kind=INT_TYPE),int(colLocal,kind=INT_TYPE)) = subdiagonalElement ++ endif ++ enddo ++ ++ ds = d ++ sds = sd ++ as = a ++ end subroutine ++ ++ subroutine prepare_matrix_toeplitz_mixed_complex& ++ &_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++#if COMPLEXCASE == 1 ++ & (na, diagonalElement, subdiagonalElement, d, sd, ds, sds, a, as, & ++ nblk, np_rows, np_cols, my_prow, my_pcol) ++#endif ++#if REALCASE == 1 ++ & (na, diagonalElement, subdiagonalElement, d, sd, ds, sds, & ++ nblk, np_rows, np_cols, my_prow, my_pcol) ++#endif ++ !use test_util ++ implicit none ++ ++ TEST_INT_TYPE, intent(in) :: na, nblk, np_rows, np_cols, my_prow, my_pcol ++ real(kind=C_DATATYPE_KIND) :: diagonalElement, subdiagonalElement ++ ++ real(kind=C_DATATYPE_KIND) :: d(:), sd(:), ds(:), sds(:) ++ ++#if COMPLEXCASE == 1 ++ complex(kind=C_DATATYPE_KIND) :: a(:,:), as(:,:) ++#endif ++#if REALCASE == 1 ++#endif ++ ++ TEST_INT_TYPE :: ii ++ integer(kind=c_int) :: rowLocal, colLocal ++#if COMPLEXCASE == 1 ++ d(:) = diagonalElement ++ sd(:) = subdiagonalElement ++ ++ ! set up the diagonal and subdiagonals (for general solver test) ++ do ii=1, na ! for diagonal elements ++ if (map_global_array_index_to_local_index(int(ii,kind=c_int), int(ii,kind=c_int), rowLocal, & ++ colLocal, int(nblk,kind=c_int), & ++ int(np_rows,kind=c_int), int(np_cols,kind=c_int), & ++ int(my_prow,kind=c_int), int(my_pcol,kind=c_int) )) then ++ a(int(rowLocal,kind=INT_TYPE),int(colLocal,kind=INT_TYPE)) = diagonalElement ++ endif ++ enddo ++ do ii=1, na-1 ++ if (map_global_array_index_to_local_index(int(ii,kind=c_int), int(ii+1,kind=c_int), rowLocal, & ++ colLocal, int(nblk,kind=c_int), & ++ int(np_rows,kind=c_int), int(np_cols,kind=c_int), & ++ int(my_prow,kind=c_int), int(my_pcol,kind=c_int) )) then ++ a(int(rowLocal,kind=INT_TYPE),int(colLocal,kind=INT_TYPE)) = subdiagonalElement ++ endif ++ enddo ++ ++ do ii=2, na ++ if (map_global_array_index_to_local_index(int(ii,kind=c_int), int(ii-1,kind=c_int), rowLocal, & ++ colLocal, int(nblk,kind=c_int), & ++ int(np_rows,kind=c_int), int(np_cols,kind=c_int), & ++ int(my_prow,kind=c_int), int(my_pcol,kind=c_int) )) then ++ a(int(rowLocal,kind=INT_TYPE),int(colLocal,kind=INT_TYPE)) = subdiagonalElement ++ endif ++ enddo ++ ++ ds = d ++ sds = sd ++ as = a ++#endif ++ end subroutine ++ ++ subroutine prepare_matrix_frank_& ++ &MATH_DATATYPE& ++ &_& ++ &PRECISION& ++ & (na, a, z, as, nblk, np_rows, np_cols, my_prow, my_pcol) ++ !use test_util ++ use precision_for_tests ++ implicit none ++ ++ TEST_INT_TYPE, intent(in) :: na, nblk, np_rows, np_cols, my_prow, my_pcol ++ ++#if REALCASE == 1 ++ real(kind=C_DATATYPE_KIND) :: a(:,:), z(:,:), as(:,:) ++#endif ++#if COMPLEXCASE == 1 ++ complex(kind=C_DATATYPE_KIND) :: a(:,:), z(:,:), as(:,:) ++#endif ++ ++ TEST_INT_TYPE :: i, j ++ integer(kind=c_int) :: rowLocal, colLocal ++ ++ do i = 1, na ++ do j = 1, na ++ if (map_global_array_index_to_local_index(int(i,kind=c_int), int(j,kind=c_int), rowLocal, & ++ colLocal, int(nblk,kind=c_int), & ++ int(np_rows,kind=c_int), int(np_cols,kind=c_int), & ++ int(my_prow,kind=c_int), int(my_pcol,kind=c_int) )) then ++ if (j .le. i) then ++ a(int(rowLocal,kind=INT_TYPE),int(colLocal,kind=INT_TYPE)) = & ++ real((na+1-i), kind=C_DATATYPE_KIND) / real(na, kind=C_DATATYPE_KIND) ++ else ++ a(int(rowLocal,kind=INT_TYPE),int(colLocal,kind=INT_TYPE)) = & ++ real((na+1-j), kind=C_DATATYPE_KIND) / real(na, kind=C_DATATYPE_KIND) ++ endif ++ endif ++ enddo ++ enddo ++ ++ z(:,:) = a(:,:) ++ as(:,:) = a(:,:) ++ ++ end subroutine ++ ++ ++! vim: syntax=fortran +diff -ruN elpa-new_release_2021.11.001/examples/shared/test_read_input_parameters.F90 elpa-new_release_2021.11.001_ok/examples/shared/test_read_input_parameters.F90 +--- elpa-new_release_2021.11.001/examples/shared/test_read_input_parameters.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/shared/test_read_input_parameters.F90 2022-01-26 10:10:58.376155000 +0100 +@@ -0,0 +1,455 @@ ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Max Planck Computing and Data Facility (MPCDF), formerly known as ++! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++! ++! ++#include "config-f90.h" ++ ++ ++#ifdef HAVE_64BIT_INTEGER_MATH_SUPPORT ++#define TEST_INT_TYPE integer(kind=c_int64_t) ++#define INT_TYPE c_int64_t ++#else ++#define TEST_INT_TYPE integer(kind=c_int32_t) ++#define INT_TYPE c_int32_t ++#endif ++ ++module test_read_input_parameters ++ use elpa, only : ELPA_2STAGE_COMPLEX_DEFAULT, ELPA_2STAGE_REAL_DEFAULT, elpa_int_string_to_value, & ++ elpa_int_value_to_string, ELPA_OK ++ use elpa_utilities, only : error_unit ++ use iso_c_binding ++ use test_util, only : x_ao, x_a ++ use test_output_type ++ ++ implicit none ++ ++ type input_options_t ++ TEST_INT_TYPE :: datatype ++ TEST_INT_TYPE :: na, nev, nblk ++ type(output_t) :: write_to_file ++ TEST_INT_TYPE :: this_real_kernel, this_complex_kernel ++ logical :: realKernelIsSet, complexKernelIsSet ++ TEST_INT_TYPE :: useQrIsSet, useGPUIsSet ++ logical :: doSolveTridi, do1stage, do2stage, justHelpMessage, & ++ doCholesky, doInvertTrm, doTransposeMultiply ++ end type ++ ++ interface read_input_parameters ++ module procedure read_input_parameters_general ++ module procedure read_input_parameters_traditional ++ module procedure read_input_parameters_traditional_noskip ++ end interface ++ ++ contains ++ ++ subroutine parse_arguments(command_line_argument, input_options) ++ implicit none ++ ++ type(input_options_t) :: input_options ++ character(len=128) :: command_line_argument ++ integer(kind=c_int) :: elpa_error ++ ++ if (command_line_argument == "--help") then ++ print *,"usage: elpa_tests [--help] [datatype={real|complex}] [na=number] [nev=number] " ++ print *," [nblk=size of block cyclic distribution] [--output_eigenvalues]" ++ print *," [--output_eigenvectors] [--real-kernel=name_of_kernel]" ++ print *," [--complex-kernel=name_of_kernel] [--use-gpu={0|1}]" ++ print *," [--use-qr={0,1}] [--tests={all|solve-tridi|1stage|2stage|cholesky& ++ &|invert-triangular|transpose-mulitply}]" ++ input_options%justHelpMessage=.true. ++ return ++ endif ++ ++ ++ if (command_line_argument(1:11) == "--datatype=") then ++ if (command_line_argument(12:15) == "real") then ++ input_options%datatype=1 ++ else ++ if (command_line_argument(12:18) == "complex") then ++ input_options%datatype=2 ++ else ++ print *,"datatype unknown! use either --datatype=real or --datatpye=complex" ++ stop 1 ++ endif ++ endif ++ endif ++ ++ if (command_line_argument(1:3) == "na=") then ++ read(command_line_argument(4:), *) input_options%na ++ endif ++ if (command_line_argument(1:4) == "nev=") then ++ read(command_line_argument(5:), *) input_options%nev ++ endif ++ if (command_line_argument(1:5) == "nblk=") then ++ read(command_line_argument(6:), *) input_options%nblk ++ endif ++ ++ if (command_line_argument(1:21) == "--output_eigenvectors") then ++ input_options%write_to_file%eigenvectors = .true. ++ endif ++ ++ if (command_line_argument(1:20) == "--output_eigenvalues") then ++ input_options%write_to_file%eigenvalues = .true. ++ endif ++ ++ if (command_line_argument(1:14) == "--real-kernel=") then ++ input_options%this_real_kernel = int(elpa_int_string_to_value("real_kernel", & ++ command_line_argument(15:), elpa_error), & ++ kind=INT_TYPE) ++ if (elpa_error /= ELPA_OK) then ++ print *, "Invalid argument for --real-kernel" ++ stop 1 ++ endif ++ print *,"Setting ELPA2 real kernel to ", elpa_int_value_to_string("real_kernel", & ++ int(input_options%this_real_kernel,kind=c_int)) ++ input_options%realKernelIsSet = .true. ++ endif ++ ++ if (command_line_argument(1:17) == "--complex-kernel=") then ++ input_options%this_complex_kernel = int(elpa_int_string_to_value("complex_kernel", & ++ command_line_argument(18:), elpa_error), kind=INT_TYPE) ++ if (elpa_error /= ELPA_OK) then ++ print *, "Invalid argument for --complex-kernel" ++ stop 1 ++ endif ++ print *,"Setting ELPA2 complex kernel to ", elpa_int_value_to_string("complex_kernel", & ++ int(input_options%this_complex_kernel,kind=c_int)) ++ input_options%complexKernelIsSet = .true. ++ endif ++ ++ if (command_line_argument(1:9) == "--use-qr=") then ++ read(command_line_argument(10:), *) input_options%useQrIsSet ++ endif ++ ++ if (command_line_argument(1:10) == "--use-gpu=") then ++ read(command_line_argument(11:), *) input_options%useGPUIsSet ++ endif ++ ++ if (command_line_argument(1:8) == "--tests=") then ++ if (command_line_argument(9:11) == "all") then ++ input_options%doSolveTridi=.true. ++ input_options%do1stage=.true. ++ input_options%do2stage=.true. ++ input_options%doCholesky=.true. ++ input_options%doInvertTrm=.true. ++ input_options%doTransposeMultiply=.true. ++ else if (command_line_argument(9:19) == "solve-tride") then ++ input_options%doSolveTridi=.true. ++ input_options%do1stage=.false. ++ input_options%do2stage=.false. ++ input_options%doCholesky=.false. ++ input_options%doInvertTrm=.false. ++ input_options%doTransposeMultiply=.false. ++ else if (command_line_argument(9:14) == "1stage") then ++ input_options%doSolveTridi=.false. ++ input_options%do1stage=.true. ++ input_options%do2stage=.false. ++ input_options%doCholesky=.false. ++ input_options%doInvertTrm=.false. ++ input_options%doTransposeMultiply=.false. ++ else if (command_line_argument(9:14) == "2stage") then ++ input_options%doSolveTridi=.false. ++ input_options%do1stage=.false. ++ input_options%do2stage=.true. ++ input_options%doCholesky=.false. ++ input_options%doInvertTrm=.false. ++ input_options%doTransposeMultiply=.false. ++ else if (command_line_argument(9:16) == "cholesky") then ++ input_options%doSolveTridi=.false. ++ input_options%do1stage=.false. ++ input_options%do2stage=.false. ++ input_options%doCholesky=.true. ++ input_options%doInvertTrm=.false. ++ input_options%doTransposeMultiply=.false. ++ else if (command_line_argument(9:25) == "invert-triangular") then ++ input_options%doSolveTridi=.false. ++ input_options%do1stage=.false. ++ input_options%do2stage=.false. ++ input_options%doCholesky=.false. ++ input_options%doInvertTrm=.true. ++ input_options%doTransposeMultiply=.false. ++ else if (command_line_argument(9:26) == "transpose-multiply") then ++ input_options%doSolveTridi=.false. ++ input_options%do1stage=.false. ++ input_options%do2stage=.false. ++ input_options%doCholesky=.false. ++ input_options%doInvertTrm=.false. ++ input_options%doTransposeMultiply=.true. ++ else ++ print *,"unknown test specified" ++ stop 1 ++ endif ++ endif ++ ++ end subroutine ++ ++ subroutine read_input_parameters_general(input_options) ++ use precision_for_tests ++ implicit none ++ ++ type(input_options_t) :: input_options ++ ++ ! Command line arguments ++ character(len=128) :: arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10 ++ ++ ! default parameters ++ input_options%datatype = 1 ++ input_options%na = 500 ++ input_options%nev = 150 ++ input_options%nblk = 16 ++ ++ input_options%write_to_file%eigenvectors = .false. ++ input_options%write_to_file%eigenvalues = .false. ++ ++ input_options%this_real_kernel = ELPA_2STAGE_REAL_DEFAULT ++ input_options%this_complex_kernel = ELPA_2STAGE_COMPLEX_DEFAULT ++ input_options%realKernelIsSet = .false. ++ input_options%complexKernelIsSet = .false. ++ ++ input_options%useQrIsSet = 0 ++ ++ input_options%useGPUIsSet = 0 ++ ++ input_options%do1Stage = .true. ++ input_options%do2Stage = .true. ++ input_options%doSolveTridi = .true. ++ input_options%doCholesky=.true. ++ input_options%doInvertTrm=.true. ++ input_options%doTransposeMultiply=.true. ++ input_options%justHelpMessage=.false. ++ ++ ! test na=1500 nev=50 nblk=16 --help --kernel --output_eigenvectors --output_eigenvalues ++ if (COMMAND_ARGUMENT_COUNT() .gt. 8) then ++ write(error_unit, '(a,i0,a)') "Invalid number (", COMMAND_ARGUMENT_COUNT(), ") of command line arguments!" ++ stop 1 ++ endif ++ ++ if (COMMAND_ARGUMENT_COUNT() .gt. 0) then ++ ++ call get_COMMAND_ARGUMENT(1, arg1) ++ ++ call parse_arguments(arg1, input_options) ++ ++ ++ ++ if (COMMAND_ARGUMENT_COUNT() .ge. 2) then ++ ! argument 2 ++ call get_COMMAND_ARGUMENT(2, arg2) ++ ++ call parse_arguments(arg2, input_options) ++ endif ++ ++ ! argument 3 ++ if (COMMAND_ARGUMENT_COUNT() .ge. 3) then ++ ++ call get_COMMAND_ARGUMENT(3, arg3) ++ ++ call parse_arguments(arg3, input_options) ++ endif ++ ++ ! argument 4 ++ if (COMMAND_ARGUMENT_COUNT() .ge. 4) then ++ ++ call get_COMMAND_ARGUMENT(4, arg4) ++ ++ call parse_arguments(arg4, input_options) ++ ++ endif ++ ++ ! argument 5 ++ if (COMMAND_ARGUMENT_COUNT() .ge. 5) then ++ ++ call get_COMMAND_ARGUMENT(5, arg5) ++ ++ call parse_arguments(arg5, input_options) ++ endif ++ ++ ! argument 6 ++ if (COMMAND_ARGUMENT_COUNT() .ge. 6) then ++ ++ call get_COMMAND_ARGUMENT(6, arg6) ++ ++ call parse_arguments(arg6, input_options) ++ endif ++ ++ ! argument 7 ++ if (COMMAND_ARGUMENT_COUNT() .ge. 7) then ++ ++ call get_COMMAND_ARGUMENT(7, arg7) ++ ++ call parse_arguments(arg7, input_options) ++ ++ endif ++ ++ ! argument 8 ++ if (COMMAND_ARGUMENT_COUNT() .ge. 8) then ++ ++ call get_COMMAND_ARGUMENT(8, arg8) ++ ++ call parse_arguments(arg8, input_options) ++ ++ endif ++ ++ ! argument 9 ++ if (COMMAND_ARGUMENT_COUNT() .ge. 9) then ++ ++ call get_COMMAND_ARGUMENT(9, arg9) ++ ++ call parse_arguments(arg8, input_options) ++ ++ endif ++ ++ ! argument 10 ++ if (COMMAND_ARGUMENT_COUNT() .ge. 10) then ++ ++ call get_COMMAND_ARGUMENT(10, arg10) ++ ++ call parse_arguments(arg8, input_options) ++ ++ endif ++ ++ endif ++ ++ if (input_options%useQrIsSet .eq. 1 .and. input_options%datatype .eq. 2) then ++ print *,"You cannot use QR-decomposition in complex case" ++ stop 1 ++ endif ++ ++ end subroutine ++ ++ subroutine read_input_parameters_traditional_noskip(na, nev, nblk, write_to_file) ++ use precision_for_tests ++ implicit none ++ ++ TEST_INT_TYPE, intent(out) :: na, nev, nblk ++ ++ type(output_t), intent(out) :: write_to_file ++ logical :: skip_check_correctness ++ ++ call read_input_parameters_traditional(na, nev, nblk, write_to_file, skip_check_correctness) ++ end subroutine ++ ++ subroutine read_input_parameters_traditional(na, nev, nblk, write_to_file, skip_check_correctness) ++ use precision_for_tests ++ implicit none ++ ++ TEST_INT_TYPE, intent(out) :: na, nev, nblk ++ ++ type(output_t), intent(out) :: write_to_file ++ logical, intent(out) :: skip_check_correctness ++ ++ ! Command line arguments ++ character(len=128) :: arg1, arg2, arg3, arg4, arg5 ++ ++ ! default parameters ++ na = 5000 ++ nev = 150 ++ nblk = 16 ++ write_to_file%eigenvectors = .false. ++ write_to_file%eigenvalues = .false. ++ skip_check_correctness = .false. ++ ++ if (.not. any(COMMAND_ARGUMENT_COUNT() == [0, 3, 4, 5])) then ++ write(error_unit, '(a,i0,a)') "Invalid number (", COMMAND_ARGUMENT_COUNT(), ") of command line arguments!" ++ write(error_unit, *) "Expected: program [ [matrix_size num_eigenvalues block_size] & ++ ""output_eigenvalues"" ""output_eigenvectors""]" ++ stop 1 ++ endif ++ ++ if (COMMAND_ARGUMENT_COUNT() == 3) then ++ call GET_COMMAND_ARGUMENT(1, arg1) ++ call GET_COMMAND_ARGUMENT(2, arg2) ++ call GET_COMMAND_ARGUMENT(3, arg3) ++ ++ read(arg1, *) na ++ read(arg2, *) nev ++ read(arg3, *) nblk ++ endif ++ ++ if (COMMAND_ARGUMENT_COUNT() == 4) then ++ call GET_COMMAND_ARGUMENT(1, arg1) ++ call GET_COMMAND_ARGUMENT(2, arg2) ++ call GET_COMMAND_ARGUMENT(3, arg3) ++ call GET_COMMAND_ARGUMENT(4, arg4) ++ read(arg1, *) na ++ read(arg2, *) nev ++ read(arg3, *) nblk ++ ++ if (arg4 .eq. "output_eigenvalues") then ++ write_to_file%eigenvalues = .true. ++ elseif (arg4 .eq. "skip_check_correctness") then ++ skip_check_correctness = .true. ++ else ++ write(error_unit, *) & ++ "Invalid value for parameter 4. Must be ""output_eigenvalues"", ""skip_check_correctness"" or omitted" ++ stop 1 ++ endif ++ ++ endif ++ ++ if (COMMAND_ARGUMENT_COUNT() == 5) then ++ call GET_COMMAND_ARGUMENT(1, arg1) ++ call GET_COMMAND_ARGUMENT(2, arg2) ++ call GET_COMMAND_ARGUMENT(3, arg3) ++ call GET_COMMAND_ARGUMENT(4, arg4) ++ call GET_COMMAND_ARGUMENT(5, arg5) ++ read(arg1, *) na ++ read(arg2, *) nev ++ read(arg3, *) nblk ++ ++ if (arg4 .eq. "output_eigenvalues") then ++ write_to_file%eigenvalues = .true. ++ else ++ write(error_unit, *) "Invalid value for output flag! Must be ""output_eigenvalues"" or omitted" ++ stop 1 ++ endif ++ ++ if (arg5 .eq. "output_eigenvectors") then ++ write_to_file%eigenvectors = .true. ++ else ++ write(error_unit, *) "Invalid value for output flag! Must be ""output_eigenvectors"" or omitted" ++ stop 1 ++ endif ++ ++ endif ++ end subroutine ++ ++end module +diff -ruN elpa-new_release_2021.11.001/examples/shared/test_redir.c elpa-new_release_2021.11.001_ok/examples/shared/test_redir.c +--- elpa-new_release_2021.11.001/examples/shared/test_redir.c 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/shared/test_redir.c 2022-01-26 10:10:58.378020000 +0100 +@@ -0,0 +1,125 @@ ++// This file is part of ELPA. ++// ++// The ELPA library was originally created by the ELPA consortium, ++// consisting of the following organizations: ++// ++// - Max Planck Computing and Data Facility (MPCDF), formerly known as ++// Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++// - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++// Informatik, ++// - Technische Universität München, Lehrstuhl für Informatik mit ++// Schwerpunkt Wissenschaftliches Rechnen , ++// - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++// - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++// Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++// and ++// - IBM Deutschland GmbH ++// ++// ++// More information can be found here: ++// http://elpa.mpcdf.mpg.de/ ++// ++// ELPA is free software: you can redistribute it and/or modify ++// it under the terms of the version 3 of the license of the ++// GNU Lesser General Public License as published by the Free ++// Software Foundation. ++// ++// ELPA is distributed in the hope that it will be useful, ++// but WITHOUT ANY WARRANTY; without even the implied warranty of ++// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++// GNU Lesser General Public License for more details. ++// ++// You should have received a copy of the GNU Lesser General Public License ++// along with ELPA. If not, see <http://www.gnu.org/licenses/> ++// ++// ELPA reflects a substantial effort on the part of the original ++// ELPA consortium, and we ask you to respect the spirit of the ++// license that we chose: i.e., please contribute any changes you ++// may have back to the original ELPA library distribution, and keep ++// any derivatives of ELPA under the same license that we chose for ++// the original distribution, the GNU Lesser General Public License. ++// ++// ++// -------------------------------------------------------------------------------------------------- ++#include <stdio.h> ++#include <fcntl.h> ++#include <stdlib.h> ++#include <unistd.h> ++#include <sys/stat.h> ++#include <sys/types.h> ++#include <unistd.h> ++#include <errno.h> ++ ++#define NAME_LENGTH 4096 ++#define FILENAME "./mpi_stdout/std%3s_rank%04d.txt" ++ ++FILE *tout, *terr; ++void dup_filename(char *filename, int dupfd); ++void dup_fd(int fd, int dupfd); ++ ++int _mkdirifnotexists(const char *dir) { ++ struct stat s; ++ if (stat(dir, &s) != 0) { ++ if (errno == ENOENT) { ++ if (mkdir(dir, 0755) != 0) { ++ perror("mkdir"); ++ return 0; ++ } else { ++ return 1; ++ } ++ } else { ++ perror("stat()"); ++ return 0; ++ } ++ } else if (!S_ISDIR(s.st_mode)) { ++ fprintf(stderr, "\"%s\" does exist and is not a directory\n", dir); ++ return 0; ++ } else { ++ return 1; ++ } ++} ++ ++int create_directories(void) { ++ if (!_mkdirifnotexists("mpi_stdout")) return 0; ++ return 1; ++} ++ ++void redirect_stdout(int *myproc) { ++ char buf[NAME_LENGTH]; ++ ++ if (*myproc == 0) { ++ snprintf(buf, NAME_LENGTH, "tee " FILENAME, "out", *myproc); ++ tout = popen(buf, "w"); ++ dup_fd(fileno(tout), 1); ++ ++ snprintf(buf, NAME_LENGTH, "tee " FILENAME, "err", *myproc); ++ terr = popen(buf, "w"); ++ dup_fd(fileno(terr), 2); ++ } else { ++ snprintf(buf, NAME_LENGTH, FILENAME, "out", *myproc); ++ dup_filename(buf, 1); ++ ++ snprintf(buf, NAME_LENGTH, FILENAME, "err", *myproc); ++ dup_filename(buf, 2); ++ } ++ ++ return; ++} ++ ++/* Redirect file descriptor dupfd to file filename */ ++void dup_filename(char *filename, int dupfd) { ++ int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0644); ++ if(fd < 0) { ++ perror("open()"); ++ exit(1); ++ } ++ dup_fd(fd, dupfd); ++} ++ ++/* Redirect file descriptor dupfd to file descriptor fd */ ++void dup_fd(int fd, int dupfd) { ++ if(dup2(fd,dupfd) < 0) { ++ perror("dup2()"); ++ exit(1); ++ } ++} +diff -ruN elpa-new_release_2021.11.001/examples/shared/test_redirect.F90 elpa-new_release_2021.11.001_ok/examples/shared/test_redirect.F90 +--- elpa-new_release_2021.11.001/examples/shared/test_redirect.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/shared/test_redirect.F90 2022-01-26 10:10:58.379123000 +0100 +@@ -0,0 +1,116 @@ ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Max Planck Computing and Data Facility (MPCDF), formerly known as ++! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++! ++! ++#include "config-f90.h" ++ ++module test_redirect ++ use, intrinsic :: iso_c_binding ++ ++ implicit none ++ public ++ ++ logical :: use_redirect_stdout ++ ++ interface ++ integer(kind=C_INT) function create_directories_c() bind(C, name="create_directories") ++ use, intrinsic :: iso_c_binding ++ implicit none ++ end function ++ end interface ++ ++ interface ++ subroutine redirect_stdout_c(myproc) bind(C, name="redirect_stdout") ++ use, intrinsic :: iso_c_binding ++ implicit none ++ integer(kind=C_INT), intent(in) :: myproc ++ end subroutine ++ end interface ++ ++ contains ++!> ++!> This function is the Fortran driver for the ++!> C program to create the redirect output ++!> directory ++!> ++!> \param none ++!> \result res integer indicates success or failure ++ function create_directories() result(res) ++ implicit none ++ integer(kind=C_INT) :: res ++ res = int(create_directories_c()) ++ end function ++!> ++!> This subroutine is the Fortran driver for the ++!> redirection of stdout and stderr of each MPI ++!> task ++!> ++!> \param myproc MPI task id ++ subroutine redirect_stdout(myproc) ++ use, intrinsic :: iso_c_binding ++ implicit none ++ integer(kind=C_INT), intent(in) :: myproc ++ call redirect_stdout_c(int(myproc, kind=C_INT)) ++ end subroutine ++!> ++!> This function checks, whether the environment variable ++!> "REDIRECT_ELPA_TEST_OUTPUT" is set to "true". ++!> Returns ".true." if variable is set, otherwise ".false." ++!> This function only works if the during the build process ++!> "HAVE_ENVIRONMENT_CHECKING" was tested successfully ++!> ++!> \param none ++!> \return logical ++ function check_redirect_environment_variable() result(redirect) ++ implicit none ++ logical :: redirect ++ character(len=255) :: REDIRECT_VARIABLE ++ ++ redirect = .false. ++ ++#if defined(HAVE_ENVIRONMENT_CHECKING) ++ call get_environment_variable("REDIRECT_ELPA_TEST_OUTPUT",REDIRECT_VARIABLE) ++#endif ++ if (trim(REDIRECT_VARIABLE) .eq. "true") redirect = .true. ++ ++ end function ++ ++end module test_redirect +diff -ruN elpa-new_release_2021.11.001/examples/shared/test_scalapack.F90 elpa-new_release_2021.11.001_ok/examples/shared/test_scalapack.F90 +--- elpa-new_release_2021.11.001/examples/shared/test_scalapack.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/shared/test_scalapack.F90 2022-01-26 10:10:58.380143000 +0100 +@@ -0,0 +1,111 @@ ++! (c) Copyright Pavel Kus, 2017, MPCDF ++! ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Max Planck Computing and Data Facility (MPCDF), formerly known as ++! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++ ++#include "../Fortran/assert.h" ++#include "config-f90.h" ++ ++module test_scalapack ++ use test_util ++ ++ interface solve_scalapack_all ++ module procedure solve_pdsyevd ++ module procedure solve_pzheevd ++#ifdef WANT_SINGLE_PRECISION_REAL ++ module procedure solve_pssyevd ++#endif ++#ifdef WANT_SINGLE_PRECISION_COMPLEX ++ module procedure solve_pcheevd ++#endif ++ end interface ++ ++ interface solve_scalapack_part ++ module procedure solve_pdsyevr ++ module procedure solve_pzheevr ++#ifdef WANT_SINGLE_PRECISION_REAL ++ module procedure solve_pssyevr ++#endif ++#ifdef WANT_SINGLE_PRECISION_COMPLEX ++ module procedure solve_pcheevr ++#endif ++ end interface ++ ++contains ++ ++#define COMPLEXCASE 1 ++#define DOUBLE_PRECISION 1 ++#include "../../src/general/precision_macros.h" ++#include "test_scalapack_template.F90" ++#undef DOUBLE_PRECISION ++#undef COMPLEXCASE ++ ++#ifdef WANT_SINGLE_PRECISION_COMPLEX ++ ++#define COMPLEXCASE 1 ++#define SINGLE_PRECISION 1 ++#include "../../src/general/precision_macros.h" ++#include "test_scalapack_template.F90" ++#undef SINGLE_PRECISION ++#undef COMPLEXCASE ++ ++#endif /* WANT_SINGLE_PRECISION_COMPLEX */ ++ ++#define REALCASE 1 ++#define DOUBLE_PRECISION 1 ++#include "../../src/general/precision_macros.h" ++#include "test_scalapack_template.F90" ++#undef DOUBLE_PRECISION ++#undef REALCASE ++ ++#ifdef WANT_SINGLE_PRECISION_REAL ++ ++#define REALCASE 1 ++#define SINGLE_PRECISION 1 ++#include "../../src/general/precision_macros.h" ++#include "test_scalapack_template.F90" ++#undef SINGLE_PRECISION ++#undef REALCASE ++ ++#endif /* WANT_SINGLE_PRECISION_REAL */ ++ ++ ++end module +diff -ruN elpa-new_release_2021.11.001/examples/shared/test_scalapack_template.F90 elpa-new_release_2021.11.001_ok/examples/shared/test_scalapack_template.F90 +--- elpa-new_release_2021.11.001/examples/shared/test_scalapack_template.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/shared/test_scalapack_template.F90 2022-01-26 10:10:58.381125000 +0100 +@@ -0,0 +1,161 @@ ++! (c) Copyright Pavel Kus, 2017, MPCDF ++! ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Max Planck Computing and Data Facility (MPCDF), formerly known as ++! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++ ++ ! compute all eigenvectors ++ subroutine solve_p& ++ &BLAS_CHAR_AND_SY_OR_HE& ++ &evd(na, a, sc_desc, ev, z) ++ implicit none ++#include "./test_precision_kinds.F90" ++ integer(kind=ik), intent(in) :: na ++ MATH_DATATYPE(kind=rck), intent(in) :: a(:,:) ++ MATH_DATATYPE(kind=rck), intent(inout) :: z(:,:) ++ real(kind=rk), intent(inout) :: ev(:) ++ integer(kind=ik), intent(in) :: sc_desc(:) ++ integer(kind=ik) :: info, lwork, liwork, lrwork ++ MATH_DATATYPE(kind=rck), allocatable :: work(:) ++ real(kind=rk), allocatable :: rwork(:) ++ integer, allocatable :: iwork(:) ++ ++ allocate(work(1), iwork(1), rwork(1)) ++ ++ ! query for required workspace ++#ifdef REALCASE ++ call p& ++ &BLAS_CHAR& ++ &syevd('V', 'U', na, a, 1, 1, sc_desc, ev, z, 1, 1, sc_desc, work, -1, iwork, -1, info) ++#endif ++#ifdef COMPLEXCASE ++ call p& ++ &BLAS_CHAR& ++ &heevd('V', 'U', na, a, 1, 1, sc_desc, ev, z, 1, 1, sc_desc, work, -1, rwork, -1, iwork, -1, info) ++#endif ++ ! write(*,*) "computed sizes", lwork, liwork, "required sizes ", work(1), iwork(1) ++ lwork = work(1) ++ liwork = iwork(1) ++ deallocate(work, iwork) ++ allocate(work(lwork), stat = info) ++ allocate(iwork(liwork), stat = info) ++#ifdef COMPLEXCASE ++ lrwork = rwork(1) ++ deallocate(rwork) ++ allocate(rwork(lrwork), stat = info) ++#endif ++ ! the actuall call to the method ++#ifdef REALCASE ++ call p& ++ &BLAS_CHAR& ++ &syevd('V', 'U', na, a, 1, 1, sc_desc, ev, z, 1, 1, sc_desc, work, lwork, iwork, liwork, info) ++#endif ++#ifdef COMPLEXCASE ++ call p& ++ &BLAS_CHAR& ++ &heevd('V', 'U', na, a, 1, 1, sc_desc, ev, z, 1, 1, sc_desc, work, lwork, rwork, lrwork, iwork, liwork, info) ++#endif ++ ++ deallocate(iwork, work, rwork) ++ end subroutine ++ ++ ++ ! compute part of eigenvectors ++ subroutine solve_p& ++ &BLAS_CHAR_AND_SY_OR_HE& ++ &evr(na, a, sc_desc, nev, ev, z) ++ implicit none ++#include "./test_precision_kinds.F90" ++ integer(kind=ik), intent(in) :: na, nev ++ MATH_DATATYPE(kind=rck), intent(in) :: a(:,:) ++ MATH_DATATYPE(kind=rck), intent(inout) :: z(:,:) ++ real(kind=rk), intent(inout) :: ev(:) ++ integer(kind=ik), intent(in) :: sc_desc(:) ++ integer(kind=ik) :: info, lwork, liwork, lrwork ++ MATH_DATATYPE(kind=rck), allocatable :: work(:) ++ real(kind=rk), allocatable :: rwork(:) ++ integer, allocatable :: iwork(:) ++ integer(kind=ik) :: comp_eigenval, comp_eigenvec, smallest_ev_idx, largest_ev_idx ++ ++ allocate(work(1), iwork(1), rwork(1)) ++ smallest_ev_idx = 1 ++ largest_ev_idx = nev ++ ! query for required workspace ++#ifdef REALCASE ++ call p& ++ &BLAS_CHAR& ++ &syevr('V', 'I', 'U', na, a, 1, 1, sc_desc, 0.0_rk, 0.0_rk, smallest_ev_idx, largest_ev_idx, & ++ comp_eigenval, comp_eigenvec, ev, z, 1, 1, sc_desc, work, -1, iwork, -1, info) ++#endif ++#ifdef COMPLEXCASE ++ call p& ++ &BLAS_CHAR& ++ &heevr('V', 'I', 'U', na, a, 1, 1, sc_desc, 0.0_rk, 0.0_rk, smallest_ev_idx, largest_ev_idx, & ++ comp_eigenval, comp_eigenvec, ev, z, 1, 1, sc_desc, work, -1, rwork, -1, iwork, -1, info) ++#endif ++ ! write(*,*) "computed sizes", lwork, liwork, "required sizes ", work(1), iwork(1) ++ lwork = work(1) ++ liwork = iwork(1) ++ deallocate(work, iwork) ++ allocate(work(lwork), stat = info) ++ allocate(iwork(liwork), stat = info) ++#ifdef COMPLEXCASE ++ lrwork = rwork(1) ++ deallocate(rwork) ++ allocate(rwork(lrwork), stat = info) ++#endif ++ ! the actuall call to the method ++#ifdef REALCASE ++ call p& ++ &BLAS_CHAR& ++ &syevr('V', 'I', 'U', na, a, 1, 1, sc_desc, 0.0_rk, 0.0_rk, smallest_ev_idx, largest_ev_idx, & ++ comp_eigenval, comp_eigenvec, ev, z, 1, 1, sc_desc, work, lwork, iwork, liwork, info) ++#endif ++#ifdef COMPLEXCASE ++ call p& ++ &BLAS_CHAR& ++ &heevr('V', 'I', 'U', na, a, 1, 1, sc_desc, 0.0_rk, 0.0_rk, smallest_ev_idx, largest_ev_idx, & ++ comp_eigenval, comp_eigenvec, ev, z, 1, 1, sc_desc, work, lwork, rwork, lrwork, iwork, liwork, info) ++#endif ++ assert(comp_eigenval == nev) ++ assert(comp_eigenvec == nev) ++ deallocate(iwork, work, rwork) ++ end subroutine ++ +diff -ruN elpa-new_release_2021.11.001/examples/shared/test_setup_mpi.F90 elpa-new_release_2021.11.001_ok/examples/shared/test_setup_mpi.F90 +--- elpa-new_release_2021.11.001/examples/shared/test_setup_mpi.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/shared/test_setup_mpi.F90 2022-01-26 10:10:58.382203000 +0100 +@@ -0,0 +1,115 @@ ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Max Planck Computing and Data Facility (MPCDF), formerly known as ++! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++! ++! ++#include "config-f90.h" ++ ++#ifdef HAVE_64BIT_INTEGER_MATH_SUPPORT ++#define TEST_INT_TYPE integer(kind=c_int64_t) ++#define INT_TYPE c_int64_t ++#else ++#define TEST_INT_TYPE integer(kind=c_int32_t) ++#define INT_TYPE c_int32_t ++#endif ++#ifdef HAVE_64BIT_INTEGER_MPI_SUPPORT ++#define TEST_INT_MPI_TYPE integer(kind=c_int64_t) ++#define INT_MPI_TYPE c_int64_t ++#else ++#define TEST_INT_MPI_TYPE integer(kind=c_int32_t) ++#define INT_MPI_TYPE c_int32_t ++#endif ++ ++module test_setup_mpi ++ ++ contains ++ ++ subroutine setup_mpi(myid, nprocs) ++ use test_util ++ use ELPA_utilities ++ use precision_for_tests ++ implicit none ++ ++ TEST_INT_MPI_TYPE :: mpierr ++ ++ TEST_INT_TYPE, intent(out) :: myid, nprocs ++ TEST_INT_MPI_TYPE :: myidMPI, nprocsMPI ++#ifdef WITH_OPENMP_TRADITIONAL ++ TEST_INT_MPI_TYPE :: required_mpi_thread_level, & ++ provided_mpi_thread_level ++#endif ++ ++ ++#ifdef WITH_MPI ++ ++#ifndef WITH_OPENMP_TRADITIONAL ++ call mpi_init(mpierr) ++#else ++ required_mpi_thread_level = MPI_THREAD_MULTIPLE ++ ++ call mpi_init_thread(required_mpi_thread_level, & ++ provided_mpi_thread_level, mpierr) ++ ++ if (required_mpi_thread_level .ne. provided_mpi_thread_level) then ++ write(error_unit,*) "MPI ERROR: MPI_THREAD_MULTIPLE is not provided on this system" ++ write(error_unit,*) " only ", mpi_thread_level_name(provided_mpi_thread_level), " is available" ++ call MPI_FINALIZE(mpierr) ++ call exit(77) ++ endif ++#endif ++ call mpi_comm_rank(mpi_comm_world, myidMPI, mpierr) ++ call mpi_comm_size(mpi_comm_world, nprocsMPI,mpierr) ++ ++ myid = int(myidMPI,kind=BLAS_KIND) ++ nprocs = int(nprocsMPI,kind=BLAS_KIND) ++ ++ if (nprocs <= 1) then ++ print *, "The test programs must be run with more than 1 task to ensure that usage with MPI is actually tested" ++ stop 1 ++ endif ++#else ++ myid = 0 ++ nprocs = 1 ++#endif ++ ++ end subroutine ++ ++ ++end module +diff -ruN elpa-new_release_2021.11.001/examples/shared/tests_variable_definitions.F90 elpa-new_release_2021.11.001_ok/examples/shared/tests_variable_definitions.F90 +--- elpa-new_release_2021.11.001/examples/shared/tests_variable_definitions.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/shared/tests_variable_definitions.F90 2022-01-26 10:10:58.385037000 +0100 +@@ -0,0 +1,65 @@ ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! https://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++! ++! This file was written by A. Marek, MPC ++ ++#include "config-f90.h" ++module precision_for_tests ++ use iso_c_binding, only : C_FLOAT, C_DOUBLE, C_FLOAT_COMPLEX, C_DOUBLE_COMPLEX, C_INT32_T, C_INT64_T, C_INT ++ ++ implicit none ++ integer, parameter :: rk8 = C_DOUBLE ++ integer, parameter :: rk4 = C_FLOAT ++ integer, parameter :: ck8 = C_DOUBLE_COMPLEX ++ integer, parameter :: ck4 = C_FLOAT_COMPLEX ++ integer, parameter :: ik = C_INT32_T ++ integer, parameter :: lik = C_INT64_T ++ ++#ifdef HAVE_64BIT_INTEGER_MATH_SUPPORT ++ integer, parameter :: BLAS_KIND = C_INT64_T ++#else ++ integer, parameter :: BLAS_KIND = C_INT32_T ++#endif ++#ifdef HAVE_64BIT_INTEGER_MPI_SUPPORT ++ integer, parameter :: MPI_KIND = C_INT64_T ++#else ++ integer, parameter :: MPI_KIND = C_INT32_T ++#endif ++end module precision_for_tests +diff -ruN elpa-new_release_2021.11.001/examples/shared/test_util.F90 elpa-new_release_2021.11.001_ok/examples/shared/test_util.F90 +--- elpa-new_release_2021.11.001/examples/shared/test_util.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/shared/test_util.F90 2022-01-26 10:10:58.383252000 +0100 +@@ -0,0 +1,156 @@ ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Max Planck Computing and Data Facility (MPCDF), formerly known as ++! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++! ++! ++#include "config-f90.h" ++#undef TEST_INT_TYPE ++#undef INT_TYPE ++#undef TEST_INT_MPI_TYPE ++#undef INT_MPI_TYPE ++ ++#ifdef HAVE_64BIT_INTEGER_MATH_SUPPORT ++#define TEST_INT_TYPE integer(kind=c_int64_t) ++#define INT_TYPE c_int64_t ++#else ++#define TEST_INT_TYPE integer(kind=c_int32_t) ++#define INT_TYPE c_int32_t ++#endif ++#ifdef HAVE_64BIT_INTEGER_MPI_SUPPORT ++#define TEST_INT_MPI_TYPE integer(kind=c_int64_t) ++#define INT_MPI_TYPE c_int64_t ++#else ++#define TEST_INT_MPI_TYPE integer(kind=c_int32_t) ++#define INT_MPI_TYPE c_int32_t ++#endif ++ ++module test_util ++ use iso_c_binding ++ use precision_for_tests ++#ifdef WITH_MPI ++#ifdef HAVE_MPI_MODULE ++ use mpi ++ implicit none ++#else ++ implicit none ++ include 'mpif.h' ++#endif ++#else ++ TEST_INT_MPI_TYPE, parameter :: mpi_comm_world = -1 ++#endif ++ ++ contains ++!> ++!> This function translates, if ELPA was build with OpenMP support, ++!> the found evel of "thread safetiness" from the internal number ++!> of the MPI library into a human understandable value ++!> ++!> \param level thread-saftiness of the MPI library ++!> \return str human understandable value of thread saftiness ++ pure function mpi_thread_level_name(level) result(str) ++ use, intrinsic :: iso_c_binding ++ implicit none ++ integer(kind=c_int), intent(in) :: level ++ character(len=21) :: str ++#ifdef WITH_MPI ++ select case(level) ++ case (MPI_THREAD_SINGLE) ++ str = "MPI_THREAD_SINGLE" ++ case (MPI_THREAD_FUNNELED) ++ str = "MPI_THREAD_FUNNELED" ++ case (MPI_THREAD_SERIALIZED) ++ str = "MPI_THREAD_SERIALIZED" ++ case (MPI_THREAD_MULTIPLE) ++ str = "MPI_THREAD_MULTIPLE" ++ case default ++ write(str,'(i0,1x,a)') level, "(Unknown level)" ++ end select ++#endif ++ end function ++ ++ function seconds() result(s) ++ integer :: ticks, tick_rate ++ real(kind=c_double) :: s ++ ++ call system_clock(count=ticks, count_rate=tick_rate) ++ s = real(ticks, kind=c_double) / tick_rate ++ end function ++ ++ subroutine x_a(condition, condition_string, file, line) ++#ifdef HAVE_ISO_FORTRAN_ENV ++ use iso_fortran_env, only : error_unit ++#endif ++ implicit none ++#ifndef HAVE_ISO_FORTRAN_ENV ++ integer, parameter :: error_unit = 0 ++#endif ++ logical, intent(in) :: condition ++ character(len=*), intent(in) :: condition_string ++ character(len=*), intent(in) :: file ++ integer, intent(in) :: line ++ ++ if (.not. condition) then ++ write(error_unit,'(a,i0)') "Assertion `" // condition_string // "` failed at " // file // ":", line ++ stop 1 ++ end if ++ end subroutine ++ ++ subroutine x_ao(error_code, error_code_string, file, line) ++ use elpa ++#ifdef HAVE_ISO_FORTRAN_ENV ++ use iso_fortran_env, only : error_unit ++#endif ++ implicit none ++#ifndef HAVE_ISO_FORTRAN_ENV ++ integer, parameter :: error_unit = 0 ++#endif ++ integer, intent(in) :: error_code ++ character(len=*), intent(in) :: error_code_string ++ character(len=*), intent(in) :: file ++ integer, intent(in) :: line ++ ++ if (error_code /= ELPA_OK) then ++ write(error_unit,'(a,i0)') "Assertion failed: `" // error_code_string // & ++ " is " // elpa_strerr(error_code) // "` at " // file // ":", line ++ stop 1 ++ end if ++ end subroutine ++end module ++ +diff -ruN elpa-new_release_2021.11.001/examples/test_real_e1.F90 elpa-new_release_2021.11.001_ok/examples/test_real_e1.F90 +--- elpa-new_release_2021.11.001/examples/test_real_e1.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/test_real_e1.F90 2022-01-28 16:43:29.688434545 +0100 +@@ -0,0 +1,255 @@ ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Max Planck Computing and Data Facility (MPCDF), formerly known as ++! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++! ++! ++!> ++!> Fortran test programm to demonstrates the use of ++!> ELPA 1 real case library. ++!> If "HAVE_REDIRECT" was defined at build time ++!> the stdout and stderr output of each MPI task ++!> can be redirected to files if the environment ++!> variable "REDIRECT_ELPA_TEST_OUTPUT" is set ++!> to "true". ++!> ++!> By calling executable [arg1] [arg2] [arg3] [arg4] ++!> one can define the size (arg1), the number of ++!> Eigenvectors to compute (arg2), and the blocking (arg3). ++!> If these values are not set default values (4000, 1500, 16) ++!> are choosen. ++!> If these values are set the 4th argument can be ++!> "output", which specifies that the EV's are written to ++!> an ascii file. ++!> ++program test_real_example ++ ++!------------------------------------------------------------------------------- ++! Standard eigenvalue problem - REAL version ++! ++! This program demonstrates the use of the ELPA module ++! together with standard scalapack routines ++! ++! Copyright of the original code rests with the authors inside the ELPA ++! consortium. The copyright of any additional modifications shall rest ++! with their original authors, but shall adhere to the licensing terms ++! distributed along with the original code in the file "COPYING". ++! ++!------------------------------------------------------------------------------- ++ ++ use iso_c_binding ++ ++ use elpa ++#ifdef WITH_OPENMP_TRADITIONAL ++ use omp_lib ++#endif ++ ++ ++#ifdef HAVE_MPI_MODULE ++ use mpi ++ implicit none ++#else ++ implicit none ++ include 'mpif.h' ++#endif ++ ++ !------------------------------------------------------------------------------- ++ ! Please set system size parameters below! ++ ! na: System size ++ ! nev: Number of eigenvectors to be calculated ++ ! nblk: Blocking factor in block cyclic distribution ++ !------------------------------------------------------------------------------- ++ ++ integer :: nblk ++ integer :: na, nev ++ ++ integer :: np_rows, np_cols, na_rows, na_cols ++ ++ integer :: myid, nprocs, my_prow, my_pcol, mpi_comm_rows, mpi_comm_cols ++ integer :: i, mpierr, my_blacs_ctxt, sc_desc(9), info, nprow, npcol ++ ++ integer, external :: numroc ++ ++ real(kind=c_double), allocatable :: a(:,:), z(:,:), ev(:) ++ ++ integer :: iseed(4096) ! Random seed, size should be sufficient for every generator ++ ++ integer :: STATUS ++ integer :: success ++ character(len=8) :: task_suffix ++ integer :: j ++ ++ integer, parameter :: error_units = 0 ++ ++#ifdef WITH_OPENMP_TRADITIONAL ++ integer n_threads ++#endif ++ class(elpa_t), pointer :: e ++ !------------------------------------------------------------------------------- ++ ++ ++ ! default parameters ++ na = 1000 ++ nev = 500 ++ nblk = 16 ++ ++ call mpi_init_thread(MPI_THREAD_SERIALIZED,info,mpierr) ++ call mpi_comm_rank(mpi_comm_world,myid,mpierr) ++ call mpi_comm_size(mpi_comm_world,nprocs,mpierr) ++ ++ do np_cols = NINT(SQRT(REAL(nprocs))),2,-1 ++ if(mod(nprocs,np_cols) == 0 ) exit ++ enddo ++ ! at the end of the above loop, nprocs is always divisible by np_cols ++ ++ np_rows = nprocs/np_cols ++ ++ ! initialise BLACS ++ my_blacs_ctxt = mpi_comm_world ++ call BLACS_Gridinit(my_blacs_ctxt, 'C', np_rows, np_cols) ++ call BLACS_Gridinfo(my_blacs_ctxt, nprow, npcol, my_prow, my_pcol) ++ ++ if (myid==0) then ++ print '(a)','| Past BLACS_Gridinfo.' ++ end if ++ ! determine the neccessary size of the distributed matrices, ++ ! we use the scalapack tools routine NUMROC ++ ++#ifdef WITH_OPENMP_TRADITIONAL ++ n_threads=omp_get_max_threads() ++#endif ++ ++ ++ na_rows = numroc(na, nblk, my_prow, 0, np_rows) ++ na_cols = numroc(na, nblk, my_pcol, 0, np_cols) ++ ++ ++ ! set up the scalapack descriptor for the checks below ++ ! For ELPA the following restrictions hold: ++ ! - block sizes in both directions must be identical (args 4 a. 5) ++ ! - first row and column of the distributed matrix must be on ++ ! row/col 0/0 (arg 6 and 7) ++ ++ call descinit(sc_desc, na, na, nblk, nblk, 0, 0, my_blacs_ctxt, na_rows, info) ++ ++ if (info .ne. 0) then ++ write(error_units,*) 'Error in BLACS descinit! info=',info ++ write(error_units,*) 'Most likely this happend since you want to use' ++ write(error_units,*) 'more MPI tasks than are possible for your' ++ write(error_units,*) 'problem size (matrix size and blocksize)!' ++ write(error_units,*) 'The blacsgrid can not be set up properly' ++ write(error_units,*) 'Try reducing the number of MPI tasks...' ++ call MPI_ABORT(mpi_comm_world, 1, mpierr) ++ endif ++ ++ if (myid==0) then ++ print '(a)','| Past scalapack descriptor setup.' ++ end if ++ ++ allocate(a (na_rows,na_cols)) ++ allocate(z (na_rows,na_cols)) ++ ++ allocate(ev(na)) ++ ++ ! we want different random numbers on every process ++ ! (otherwise A might get rank deficient): ++ ++ iseed(:) = myid ++ call RANDOM_SEED(put=iseed) ++ call RANDOM_NUMBER(z) ++ ++ a(:,:) = z(:,:) ++ ++ if (myid == 0) then ++ print '(a)','| Random matrix block has been set up. (only processor 0 confirms this step)' ++ endif ++ call pdtran(na, na, 1.d0, z, 1, 1, sc_desc, 1.d0, a, 1, 1, sc_desc) ! A = A + Z**T ++ ++ !------------------------------------------------------------------------------- ++ ++ if (elpa_init(20171201) /= elpa_ok) then ++ print *, "ELPA API version not supported" ++ stop ++ endif ++ e => elpa_allocate() ++ ++ ! set parameters decribing the matrix and it's MPI distribution ++ call e%set("na", na, success) ++ call e%set("nev", nev, success) ++ call e%set("local_nrows", na_rows, success) ++ call e%set("local_ncols", na_cols, success) ++ call e%set("nblk", nblk, success) ++ call e%set("mpi_comm_parent", mpi_comm_world, success) ++ call e%set("process_row", my_prow, success) ++ call e%set("process_col", my_pcol, success) ++ ++#ifdef CUDA ++ call e%set("nvidia-gpu", 1, success) ++#endif ++#ifdef WITH_OPENMP_TRADITIONAL ++ call e%set("omp_threads", n_threads, success) ++#endif ++ success = e%setup() ++ ++ call e%set("solver", elpa_solver_1stage, success) ++ ++ ++ ! Calculate eigenvalues/eigenvectors ++ ++ if (myid==0) then ++ print '(a)','| Entering one-step ELPA solver ... ' ++ print * ++ end if ++ ++ call mpi_barrier(mpi_comm_world, mpierr) ! for correct timings only ++ call e%eigenvectors(a, ev, z, success) ++ ++ if (myid==0) then ++ print '(a)','| One-step ELPA solver complete.' ++ print * ++ end if ++ ++ call elpa_deallocate(e) ++ call elpa_uninit() ++ ++ call blacs_gridexit(my_blacs_ctxt) ++ call mpi_finalize(mpierr) ++ ++end ++ +diff -ruN elpa-new_release_2021.11.001/examples/test_real_e2.F90 elpa-new_release_2021.11.001_ok/examples/test_real_e2.F90 +--- elpa-new_release_2021.11.001/examples/test_real_e2.F90 1970-01-01 01:00:00.000000000 +0100 ++++ elpa-new_release_2021.11.001_ok/examples/test_real_e2.F90 2022-02-01 09:28:16.102146696 +0100 +@@ -0,0 +1,262 @@ ++! This file is part of ELPA. ++! ++! The ELPA library was originally created by the ELPA consortium, ++! consisting of the following organizations: ++! ++! - Max Planck Computing and Data Facility (MPCDF), formerly known as ++! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ++! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ++! Informatik, ++! - Technische Universität München, Lehrstuhl für Informatik mit ++! Schwerpunkt Wissenschaftliches Rechnen , ++! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ++! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ++! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ++! and ++! - IBM Deutschland GmbH ++! ++! ++! More information can be found here: ++! http://elpa.mpcdf.mpg.de/ ++! ++! ELPA is free software: you can redistribute it and/or modify ++! it under the terms of the version 3 of the license of the ++! GNU Lesser General Public License as published by the Free ++! Software Foundation. ++! ++! ELPA is distributed in the hope that it will be useful, ++! but WITHOUT ANY WARRANTY; without even the implied warranty of ++! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++! GNU Lesser General Public License for more details. ++! ++! You should have received a copy of the GNU Lesser General Public License ++! along with ELPA. If not, see <http://www.gnu.org/licenses/> ++! ++! ELPA reflects a substantial effort on the part of the original ++! ELPA consortium, and we ask you to respect the spirit of the ++! license that we chose: i.e., please contribute any changes you ++! may have back to the original ELPA library distribution, and keep ++! any derivatives of ELPA under the same license that we chose for ++! the original distribution, the GNU Lesser General Public License. ++! ++! ++!> ++!> Fortran test programm to demonstrates the use of ++!> ELPA 2 real case library. ++!> If "HAVE_REDIRECT" was defined at build time ++!> the stdout and stderr output of each MPI task ++!> can be redirected to files if the environment ++!> variable "REDIRECT_ELPA_TEST_OUTPUT" is set ++!> to "true". ++!> ++!> By calling executable [arg1] [arg2] [arg3] [arg4] ++!> one can define the size (arg1), the number of ++!> Eigenvectors to compute (arg2), and the blocking (arg3). ++!> If these values are not set default values (4000, 1500, 16) ++!> are choosen. ++!> If these values are set the 4th argument can be ++!> "output", which specifies that the EV's are written to ++!> an ascii file. ++!> ++program test_real_example ++ ++!------------------------------------------------------------------------------- ++! Standard eigenvalue problem - REAL version ++! ++! This program demonstrates the use of the ELPA module ++! together with standard scalapack routines ++! ++! Copyright of the original code rests with the authors inside the ELPA ++! consortium. The copyright of any additional modifications shall rest ++! with their original authors, but shall adhere to the licensing terms ++! distributed along with the original code in the file "COPYING". ++! ++!------------------------------------------------------------------------------- ++ ++ use iso_c_binding ++ ++ use elpa ++ ++#ifdef HAVE_MPI_MODULE ++ use mpi ++ implicit none ++#else ++ implicit none ++ include 'mpif.h' ++#endif ++ ++ !------------------------------------------------------------------------------- ++ ! Please set system size parameters below! ++ ! na: System size ++ ! nev: Number of eigenvectors to be calculated ++ ! nblk: Blocking factor in block cyclic distribution ++ !------------------------------------------------------------------------------- ++ ++ integer :: nblk ++ integer :: na, nev ++ ++ integer :: np_rows, np_cols, na_rows, na_cols ++ ++ integer :: myid, nprocs, my_prow, my_pcol, mpi_comm_rows, mpi_comm_cols ++ integer :: i, mpierr, my_blacs_ctxt, sc_desc(9), info, nprow, npcol ++ ++ integer, external :: numroc ++ ++ real(kind=c_double), allocatable :: a(:,:), z(:,:), ev(:) ++ ++ integer :: iseed(4096) ! Random seed, size should be sufficient for every generator ++ ++ integer :: STATUS ++ integer :: success ++ character(len=8) :: task_suffix ++ integer :: j ++ ++ integer, parameter :: error_units = 0 ++ ++ class(elpa_t), pointer :: e ++ !------------------------------------------------------------------------------- ++ ++ ++ ! default parameters ++ na = 1000 ++ nev = 500 ++ nblk = 16 ++ ++ call mpi_init_thread(MPI_THREAD_SERIALIZED,info,mpierr) ++ call mpi_comm_rank(mpi_comm_world,myid,mpierr) ++ call mpi_comm_size(mpi_comm_world,nprocs,mpierr) ++ ++ do np_cols = NINT(SQRT(REAL(nprocs))),2,-1 ++ if(mod(nprocs,np_cols) == 0 ) exit ++ enddo ++ ! at the end of the above loop, nprocs is always divisible by np_cols ++ ++ np_rows = nprocs/np_cols ++ ++ ! initialise BLACS ++ my_blacs_ctxt = mpi_comm_world ++ call BLACS_Gridinit(my_blacs_ctxt, 'C', np_rows, np_cols) ++ call BLACS_Gridinfo(my_blacs_ctxt, nprow, npcol, my_prow, my_pcol) ++ ++ if (myid==0) then ++ print '(a)','| Past BLACS_Gridinfo.' ++ end if ++ ! determine the neccessary size of the distributed matrices, ++ ! we use the scalapack tools routine NUMROC ++ ++ na_rows = numroc(na, nblk, my_prow, 0, np_rows) ++ na_cols = numroc(na, nblk, my_pcol, 0, np_cols) ++ ++ ++ ! set up the scalapack descriptor for the checks below ++ ! For ELPA the following restrictions hold: ++ ! - block sizes in both directions must be identical (args 4 a. 5) ++ ! - first row and column of the distributed matrix must be on ++ ! row/col 0/0 (arg 6 and 7) ++ ++ call descinit(sc_desc, na, na, nblk, nblk, 0, 0, my_blacs_ctxt, na_rows, info) ++ ++ if (info .ne. 0) then ++ write(error_units,*) 'Error in BLACS descinit! info=',info ++ write(error_units,*) 'Most likely this happend since you want to use' ++ write(error_units,*) 'more MPI tasks than are possible for your' ++ write(error_units,*) 'problem size (matrix size and blocksize)!' ++ write(error_units,*) 'The blacsgrid can not be set up properly' ++ write(error_units,*) 'Try reducing the number of MPI tasks...' ++ call MPI_ABORT(mpi_comm_world, 1, mpierr) ++ endif ++ ++ if (myid==0) then ++ print '(a)','| Past scalapack descriptor setup.' ++ end if ++ ++ allocate(a (na_rows,na_cols)) ++ allocate(z (na_rows,na_cols)) ++ ++ allocate(ev(na)) ++ ++ ! we want different random numbers on every process ++ ! (otherwise A might get rank deficient): ++ ++ iseed(:) = myid ++ call RANDOM_SEED(put=iseed) ++ call RANDOM_NUMBER(z) ++ ++ a(:,:) = z(:,:) ++ ++ if (myid == 0) then ++ print '(a)','| Random matrix block has been set up. (only processor 0 confirms this step)' ++ endif ++ call pdtran(na, na, 1.d0, z, 1, 1, sc_desc, 1.d0, a, 1, 1, sc_desc) ! A = A + Z**T ++ ++ !------------------------------------------------------------------------------- ++ ++ if (elpa_init(20171201) /= elpa_ok) then ++ print *, "ELPA API version not supported" ++ stop ++ endif ++ e => elpa_allocate() ++ ++ ! set parameters decribing the matrix and it's MPI distribution ++ call e%set("na", na, success) ++ call e%set("nev", nev, success) ++ call e%set("local_nrows", na_rows, success) ++ call e%set("local_ncols", na_cols, success) ++ call e%set("nblk", nblk, success) ++ call e%set("mpi_comm_parent", mpi_comm_world, success) ++ call e%set("process_row", my_prow, success) ++ call e%set("process_col", my_pcol, success) ++#ifdef CUDA ++ call e%set("nvidia-gpu", 1, success) ++#endif ++ ++ success = e%setup() ++ ++#ifdef CUDAKERNEL ++ call e%set("real_kernel", ELPA_2STAGE_REAL_NVIDIA_GPU, success) ++#endif ++#ifdef AVX512 ++ call e%set("real_kernel", ELPA_2STAGE_REAL_AVX512_BLOCK2,success ) ++#endif ++#ifdef AVX2_B6 ++ call e%set("real_kernel", ELPA_2STAGE_REAL_AVX2_BLOCK6,success ) ++#endif ++#ifdef AVX2_B4 ++ call e%set("real_kernel", ELPA_2STAGE_REAL_AVX2_BLOCK4,success ) ++#endif ++#ifdef AVX2_B2 ++ call e%set("real_kernel", ELPA_2STAGE_REAL_AVX2_BLOCK2,success ) ++#endif ++#ifdef GENERIC ++ call e%set("real_kernel", ELPA_2STAGE_REAL_GENERIC,success ) ++#endif ++#ifdef GENERIC_SIMPLE ++ call e%set("real_kernel", ELPA_2STAGE_REAL_GENERIC_SIMPLE,success ) ++#endif ++ ++ call e%set("solver", elpa_solver_2stage, success) ++ ++ ++ ! Calculate eigenvalues/eigenvectors ++ ++ if (myid==0) then ++ print '(a)','| Entering two-step ELPA solver ... ' ++ print * ++ end if ++ ++ call mpi_barrier(mpi_comm_world, mpierr) ! for correct timings only ++ call e%eigenvectors(a, ev, z, success) ++ ++ if (myid==0) then ++ print '(a)','| Two-step ELPA solver complete.' ++ print * ++ end if ++ ++ call elpa_deallocate(e) ++ call elpa_uninit() ++ ++ call blacs_gridexit(my_blacs_ctxt) ++ call mpi_finalize(mpierr) ++ ++end ++