diff --git a/.gitignore b/.gitignore index 4acf2fbe3dd93867d3e3bfec50284edce3387799..61f1623981fc2902aa1d61bd39039a1a608b22a1 100644 --- a/.gitignore +++ b/.gitignore @@ -34,6 +34,7 @@ ./build build build2 +build3 CMakeLists.txt.user CMakeCache.txt CMakeFiles diff --git a/CMakeLists.txt b/CMakeLists.txt index a2401676da8a114b7785e63ecc7a6f37c057751b..350543261cb686a3eac2d4ddc89663353ef477e8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -158,6 +158,7 @@ ADD_SUBDIRECTORY(HEEVD) ADD_SUBDIRECTORY(updateQR) ADD_SUBDIRECTORY(TSQR) +ADD_SUBDIRECTORY(GEMM) include(Dart) include(CPack) diff --git a/GEMM/CMakeLists.txt b/GEMM/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..18f6c1da379bc47bcd1f0c7a395765b81405fcfb --- /dev/null +++ b/GEMM/CMakeLists.txt @@ -0,0 +1,6 @@ +if(MKL_MT) + add_executable(gemm_mkl.exe gemm_mkl.cpp) + target_link_libraries(gemm_mkl.exe PRIVATE mkl_common_mt ${OPENMP_FLAGS}) +endif() + + diff --git a/GEMM/gemm_mkl.cpp b/GEMM/gemm_mkl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ed97f6f54fd0c1d3ce957facf058acc46959a8ec --- /dev/null +++ b/GEMM/gemm_mkl.cpp @@ -0,0 +1,81 @@ +#include <stdio.h> +#include <iostream> +#include <vector> +#include <cmath> +#include <time.h> +#include <iomanip> +#include <chrono> +#include <cstdlib> +#include <boost/program_options.hpp> +#include <boost/foreach.hpp> +#include <boost/tokenizer.hpp> + +#include <mkl.h> + +#include <omp.h> + +int main(int argc, char** argv) { + + using namespace boost::program_options; + + options_description desc_cmdline; + + desc_cmdline.add_options() + ("size,s", value<int>()->default_value(-1), "Matrix size (if > 0, overrides m, n, k).") + ("m", value<int>()->default_value(1024), "Size m.") + ("n", value<int>()->default_value(1024), "Size n.") + ("k", value<int>()->default_value(512), "Size k.") + ("omp:threads", value<int>()->default_value(1), "OpenMP thread number.") + ("repeat", value<int>()->default_value(1), "repeat times."); + + variables_map vm; + + store(parse_command_line(argc, argv, desc_cmdline),vm); + + int m = vm["m"].as<int>(); + int n = vm["n"].as<int>(); + int k = vm["k"].as<int>(); + int s = vm["size"].as<int>(); + int repeat = vm["repeat"].as<int>(); + + if (s > 0) + m = n = k = s; + + int num_threads = vm["omp:threads"].as<int>(); + omp_set_num_threads(num_threads); + + const double alpha = 1.0; + + const double beta = 1.0; + + std::vector<double> A(m * k); + std::vector<double> B(k * n); + std::vector<double> C(m * n); + + std::generate(A.begin(), A.end(), [ttt = 1] () mutable { return ttt++; }); + std::generate(B.begin(), B.end(), [ttt = 2] () mutable { return ttt++; }); + std::fill(C.begin(), C.end(), (double)0.0); + + double flops = 2.0 * m * n * k / 1e9; + + std::chrono::high_resolution_clock::time_point start, end; + + std::chrono::duration<double> elapsed; + + start = std::chrono::high_resolution_clock::now(); + + for(int i = 0; i < repeat; i++){ + + dgemm("N", "N", &m, &n, &k, &alpha, A.data(), &m, B.data(), &k, &beta, C.data(), &m); + + } + + end = std::chrono::high_resolution_clock::now(); + + elapsed = std::chrono::duration_cast<std::chrono::duration<double>>(end - start); + + std::cout << m << "," << n << "," << k << "," << "GEMM (mkl)," << num_threads << "," << 1 << "," << elapsed.count() << "," << repeat * flops / elapsed.count() << std::endl; + + return 0; +} + diff --git a/updateQR/updateQR.cpp b/updateQR/updateQR.cpp index ad2e8ff0db08fa3d2292a22d929bf20bc4edcf03..f92e4569b2535bd57108490473c0d1b7ca15ec2f 100644 --- a/updateQR/updateQR.cpp +++ b/updateQR/updateQR.cpp @@ -281,40 +281,57 @@ int main(int argc, char** argv) { std::vector<double> tau(nevx); - std::vector<double> Vtmp; + std::vector<double> Vtmp(m * nevx); - std::chrono::high_resolution_clock::time_point t1, t2, t3, t4; + std::chrono::high_resolution_clock::time_point t1, t2; - t1 = std::chrono::high_resolution_clock::now(); + std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now(); - geqrf(m, nevx, V.data(), m, tau.data()); + for(int i = 0; i < num_iter; i++){ + //monitor of filter V<-V + for(int j = m * fixed; j < V.size(); j++){ + V[j] += (double)rand()/RAND_MAX*2.0-1.0; + } - t2 = std::chrono::high_resolution_clock::now(); + if(printtime){ + t1 = std::chrono::high_resolution_clock::now(); + } - std::chrono::duration<double> duration_1 = std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1); + std::memcpy(V.data(), Vtmp.data(), m * fixed * sizeof(double)); -// - for(int j = 0; j < V.size(); j++){ + mqr((char *)"L", (char *)"T", m, nevx - fixed, fixed, V.data(), m, tau.data(), V.data() + m * fixed, m); - V[j] = j + 2.1; + //QR + geqrf(m - fixed, nevx - fixed, V.data() + fixed * m + fixed, m, tau.data() + fixed); - } + Vtmp = V; - t3 = std::chrono::high_resolution_clock::now(); - geqrf_c(m, nevx, V.data(), m, tau.data()); + if(printtime){ + t2 = std::chrono::high_resolution_clock::now(); - t4 = std::chrono::high_resolution_clock::now(); + std::chrono::duration<double> time = std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1); - std::chrono::duration<double> duration_2 = std::chrono::duration_cast<std::chrono::duration<double>>(t4 - t3); + std::cout << "updateQR," << i+1 << "," << time.count() << std::endl; - std::cout << "MKL GEQRF = " << duration_1.count() << "s" << ", My GEQRF = " << duration_2.count() << "s." << std::endl; - std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now(); + } + + gqr(m, nevx, nevx, V.data(), m, tau.data()); + + //reduce to active space and compute residual + + //deflation and locking: here "locked" number of eigepair are approximated + std::memcpy(Y.data() + fixed * m, V.data() + fixed * m, m * locked * sizeof(double)); + + fixed += locked; + + } std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now(); - + std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double>>(end - start); + if(debug == "yes") showMatrix(V, m, nevx, "V2"); @@ -322,7 +339,7 @@ int main(int argc, char** argv) { if(debug == "yes") showMatrix(Y, m, nev, "Y2"); -// std::cout << "UpdateQR," << m << "," << nev << "," << nex << "," << locked << "," << time_span.count() << "," << num_threads << std::endl; + std::cout << "UpdateQR," << m << "," << nev << "," << nex << "," << locked << "," << time_span.count() << "," << num_threads << std::endl; return 0;