Select Git revision
_Plot_profiles.py
Forked from
JuPedSim / JPSreport
Source project has a limited visibility.
tsqr_elemental.cpp 5.33 KiB
#include <El.hpp>
#include <stdio.h>
#include <iostream>
#include <vector>
#include <stdlib.h>
#include <cmath>
#include <time.h>
#include <boost/program_options.hpp>
#include <omp.h>
using namespace El;
std::tuple<double, double> validation(DistMatrix<double,VC, STAR>& Q, DistMatrix<double,STAR,STAR>& R, DistMatrix<double,VC, STAR>& A){
const Grid& g = A.Grid();
const Int m = A.Height();
const Int n = A.Width();
const Int maxDim = Max(m,n);
const double eps = 5*10e-16;
const double oneNormA = OneNorm( A );
// Form I - Q^H Q
DistMatrix<double> Z(g);
Identity( Z, n, n );
Herk( UPPER, ADJOINT, -1.0, Q, 1.0, Z );
const double infOrthogError = HermitianInfinityNorm( UPPER, Z );
const double relOrthogError = infOrthogError / (eps*maxDim);
// Form A - Q R
LocalGemm( NORMAL, NORMAL, -1.0, Q, R, 1.0, A );
const double infError = InfinityNorm( A );
const double relError = infError / (eps*maxDim*oneNormA);
return std::make_tuple(relOrthogError, relError);
}
DistMatrix<double,STAR,STAR> ConstructR(AbstractDistMatrix<double>& A, qr::TreeData<double>& treeData )
{
if( A.RowDist() != STAR ){
LogicError("Invalid row distribution for TSQR");
}
const Grid& g = A.Grid();
DistMatrix<double,CIRC,CIRC> RRoot(g);
if( A.ColRank() == 0 )
{
const Int n = A.Width();
auto R = qr::ts::RootQR(A,treeData);
auto RTop = R( IR(0,n), IR(0,n) );
RRoot.CopyFromRoot( RTop );
MakeTrapezoidal( UPPER, RRoot );
}
else
{
RRoot.CopyFromNonRoot();
}
DistMatrix<double,STAR,STAR> R(g);
R = RRoot;
return R;
}
int main( int argc, char* argv[] ){
Initialize( argc, argv );
mpi::Comm comm = mpi::COMM_WORLD;
using namespace boost::program_options;
options_description desc_commandline;
desc_commandline.add_options()
("m", value<int>()->default_value(1024), "Size m.")
("n", value<int>()->default_value(32), "Size n.")
("nb", value<int>()->default_value(32), "Size n.")
("rpoc", value<int>()->default_value(1), "proc nb in row dimension.")
("debug", value<std::string>()->default_value("no"), "(debug) => print matrices")
("explicit-Q", bool_switch()->default_value(false), "Explicit generation of Q")
("validate", bool_switch()->default_value(false), "validation of the implementation")
("omp:threads", value<int>()->default_value(1), "OpenMP thread number.")
("repetitions,r", value<int>()->default_value(1), "Number of repetitions.");
variables_map vm;
store(parse_command_line(argc, argv, desc_commandline), vm);
int m = vm["m"].as<int>();
int n = vm["n"].as<int>();
int nb = vm["nb"].as<int>();
int rpoc_init = vm["rpoc"].as<int>();
int rep = vm["repetitions"].as<int>();
bool explicitQ=vm["explicit-Q"].as<bool>();
bool validate = vm["validate"].as<bool>();
std::string out = vm["debug"].as<std::string>();
bool debug = false;
if(out == "yes"){
debug = true;
}
int num_threads = vm["omp:threads"].as<int>();
#if defined(USE_MULTITHREADS)
omp_set_num_threads(num_threads);
#endif
int size;
MPI_Comm_size(MPI_COMM_WORLD, &size);
const Grid g( comm, size, COLUMN_MAJOR);
SetBlocksize( nb );
DistMatrix<double,VC,STAR> A(g), AFact(g);
DistMatrix<double,STAR,STAR> R(g);
Zeros(A, m, n);
srand (1);
for(int i = 0; i < m * n; i++){
// double val = (double)rand()/RAND_MAX*10.-5.;
double val = (double)rand()/RAND_MAX*5.-10.;
int xind = i % m;
int yind = i / m ;
A.Set(xind, yind, val);
}
if(debug)
Print( A, "A" );
AFact = A;
Timer timer;
mpi::Barrier( g.Comm() );
timer.Start();
if(explicitQ){
qr::ExplicitTS( AFact, R );
}else{
qr::TreeData<double> treeData = qr::TS( AFact );
Copy( ConstructR( AFact, treeData ), R );
}
mpi::Barrier( g.Comm() );
const double time_diff = timer.Stop();
if(explicitQ && validate){
double relOrthogError, relError;
auto valid = validation(AFact, R, A);
relOrthogError = std::get<0>(valid);
relError = std::get<1>(valid);
if (g.Rank() == 0) {
std::cout << "----------------------" << std::endl;
std::cout << "||Q^H Q - I||_oo / (eps Max(m,n)) =: " << relOrthogError << ". ";
if(relOrthogError > 10.0){
std::cout << "Unacceptably large relative orthogonality error" ;
}
std::cout << std::endl;
std::cout << "||A - QR||_oo / (eps Max(m,n) ||A||_1) =: "<< relError << ". ";
if(relError > 10.0){
std::cout << "Unacceptably large relative error" ;
}
std::cout << std::endl;
std::cout << "----------------------" << std::endl;
}
}
if(explicitQ){
if(g.Rank() == 0) std::cout << "TSExplicit Elemental," << m << "," << n << "," << nb << "," << size << "," << num_threads << "," << time_diff << std::endl;
}else{
if(g.Rank() == 0) std::cout << "TS Elemental," << m << "," << n << "," << nb << "," << size << "," << num_threads << "," << time_diff << std::endl;
}
if(debug){
if(explicitQ){
Print( AFact, "Q" );
Print( R, "R" );
}else{
Print( R, "R" );
}
}
return 0;
}