diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4b368ae4041497fe92917f96b7e16d8c057285c3..0fb81608d55c8e2eab791bc36f3d8ebafb7a6e60 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,15 +1,13 @@ -image: centos-test - stages: - init - build before_script: - - hostname - - whoami - - pwd - - ls - - ls -l ci +# - hostname +# - whoami +# - pwd +# - ls +# - ls -l ci - mkdir -p badges/ badges: @@ -44,7 +42,7 @@ build-mpi-nofortran: build-mpi-fortran: stage: build - tags: + tags: - linux when: always script: diff --git a/CMakeLists.txt b/CMakeLists.txt index 68a9d62a2318075197faaf92c9a1a029bb22a741..8c82569f975f4673c34937e7397ed45607ca098f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,6 +3,7 @@ cmake_minimum_required (VERSION 3.1) option(CM_ALL_VTK_OUTPUT "VTK output routine" OFF) option(CM_ALL_VORONOI "Voronoi-based loadbalancing scheme (requires Voro++)" OFF) option(CM_ALL_FORTRAN "VTK output routine" OFF) +option(CM_ALL_DEBUG "Enable debug information" OFF) # leading and trailing whitespace should be silently ignored # thanks to an old FindMPI script @@ -29,6 +30,11 @@ if(CM_ALL_VTK_OUTPUT) add_compile_options("-DALL_VTK_OUTPUT") endif(CM_ALL_VTK_OUTPUT) +if (CM_ALL_DEBUG) + message("Using ALL debug information") + add_compile_options("-DALL_DEBUG_ENABLED") +endif(CM_ALL_DEBUG) + if(CM_ALL_VORONOI) message(STATUS "compiling voro++ version in contrib/voro++") add_subdirectory(contrib/voro++) diff --git a/example/ALL_test.cpp b/example/ALL_test.cpp index 6c5923fe7da098a007d7df4f970e2d62469341e5..dfd9876cc54e17d5efd9a9adb3ee5cb0a7c934e4 100644 --- a/example/ALL_test.cpp +++ b/example/ALL_test.cpp @@ -62,6 +62,8 @@ #define MAX_NEIG 1024 #define ALL_HISTOGRAM_DEFAULT_WIDTH 1.0 +#undef ALL_VTK_OUTPUT + #ifdef ALL_VORONOI #define ALL_VORONOI_PREP_STEPS 50 #endif @@ -125,9 +127,9 @@ void generate_points(std::vector<ALL_Point<double>>& points, * * format of MPI I/O files: * - * n_proc int : offset in point data - * n_proc int : number of points on domain - * 5*n_point double : point data + * n_proc int : offset in point data + * n_proc int : number of points on domain + * long + 4*n_point double : point data ****************************************************************/ void read_points(std::vector<ALL_Point<double>>& points, @@ -146,9 +148,9 @@ void read_points(std::vector<ALL_Point<double>>& points, n_points = 0; - int n_procs; - int offset; - MPI_Comm_size(comm,&n_procs); + int n_ranks; + long offset; + MPI_Comm_size(comm,&n_ranks); if (err) { @@ -158,30 +160,40 @@ void read_points(std::vector<ALL_Point<double>>& points, // read offset from file MPI_File_read_at(file, - (MPI_Offset)(rank*sizeof(int)), + (MPI_Offset)(rank*sizeof(long)), &offset, 1, - MPI_INT, + MPI_LONG, MPI_STATUS_IGNORE); // read number of points from file MPI_File_read_at(file, - (MPI_Offset)((n_procs + rank)*sizeof(int)), + (MPI_Offset)(n_ranks * sizeof(long) + rank * sizeof(int)), &n_points, 1, MPI_INT, MPI_STATUS_IGNORE); - double values[5]; + double values[4]; + long ID; + long block_size = sizeof(long) + 4*sizeof(double); for (int i = 0; i < n_points; ++i) { MPI_File_read_at(file, - (MPI_Offset)(5*(offset + i)*sizeof(double) - + 2 * n_procs * sizeof(int)), + (MPI_Offset)((offset + i)*block_size + + n_ranks * ( sizeof(int) + sizeof(long))), + &ID, + 1, + MPI_DOUBLE, + MPI_STATUS_IGNORE); + MPI_File_read_at(file, + (MPI_Offset)((offset + i)*block_size + + n_ranks *( sizeof(int)+sizeof(long)) + + sizeof(long)), values, - 5, + 4, MPI_DOUBLE, MPI_STATUS_IGNORE); - ALL_Point<double> p(dimension,&values[1],values[4]); + ALL_Point<double> p(dimension,&values[0],values[3],ID); points.push_back(p); } @@ -316,6 +328,8 @@ int main(int argc, char** argv) sys_size.at(4) = 0; sys_size.at(5) = BOX_SIZE; + std::vector<double> min_size(3,2.0); + ALL_LB_t chosen_method = ALL_LB_t::STAGGERED; bool weighted_points = false; char* filename = NULL; @@ -1005,7 +1019,7 @@ int main(int argc, char** argv) #endif double limit_efficiency = 0.5; // create ALL object - ALL<double,double> lb_obj(sys_dim,vertices,gamma); + ALL<double,double> lb_obj(chosen_method,sys_dim,vertices,gamma); for (int i_loop = 0; i_loop < max_loop; ++i_loop) { MPI_Barrier(cart_comm); @@ -1182,42 +1196,91 @@ int main(int argc, char** argv) } #endif } - +#ifdef ALL_DEBUG_ENABLED MPI_Barrier(cart_comm); if (local_rank == 0) std::cout << "finished computation of work" << std::endl; //lb_obj.set_work((double)n_points); + if( local_rank == 0 ) std::cout << "Setting work..." << std::endl; +#endif lb_obj.set_work(work); +#ifdef ALL_DEBUG_ENABLED + MPI_Barrier(cart_comm); + if( local_rank == 0 ) std::cout << "Setting process grid information..." << std::endl; +#endif lb_obj.set_proc_grid_params(local_coords,global_dim); +#ifdef ALL_DEBUG_ENABLED + MPI_Barrier(cart_comm); + if( local_rank == 0 ) std::cout << "Setting communicator..." << std::endl; +#endif lb_obj.set_communicator(MPI_COMM_WORLD); - lb_obj.setup(chosen_method); +#ifdef ALL_DEBUG_ENABLED + MPI_Barrier(cart_comm); + if( local_rank == 0 ) std::cout << "Setting up method..." << std::endl; +#endif + lb_obj.setup(); +#ifdef ALL_DEBUG_ENABLED + MPI_Barrier(cart_comm); + if( local_rank == 0 ) std::cout << "Setting method data..." << std::endl; +#endif if (chosen_method == ALL_LB_t::HISTOGRAM) - lb_obj.set_method_data(chosen_method,n_bins.data()); + lb_obj.set_method_data(n_bins.data()); - - lb_obj.set_sys_size(chosen_method, sys_size); +#ifdef ALL_DEBUG_ENABLED + MPI_Barrier(cart_comm); + if( local_rank == 0 ) std::cout << "Setting system size..." << std::endl; +#endif + lb_obj.set_sys_size(sys_size); +#ifdef ALL_DEBUG_ENABLED + MPI_Barrier(cart_comm); + if( local_rank == 0 ) std::cout << "Setting domain vertices..." << std::endl; +#endif lb_obj.set_vertices(vertices); +#ifdef ALL_DEBUG_ENABLED + MPI_Barrier(cart_comm); + if( local_rank == 0 ) std::cout << "Setting domain minimum size..." << std::endl; +#endif + lb_obj.set_min_domain_size(min_size); // print out number of particles to check communication! int n_points_global = 0; MPI_Reduce(&n_points,&n_points_global,1,MPI_INT,MPI_SUM,0,MPI_COMM_WORLD); - if (local_rank == 0) - std::cout << "number of particles in step " - << i_loop << ": " << n_points_global << std::endl; - lb_obj.balance(chosen_method); + if( local_rank == 0 ) std::cout << "number of particles in step " + << i_loop << ": " << n_points_global << std::endl; +#ifdef ALL_DEBUG_ENABLED + MPI_Barrier(cart_comm); + if( local_rank == 0 ) std::cout << "Balancing domains..." << std::endl; +#endif + lb_obj.balance(); +#ifdef ALL_DEBUG_ENABLED + MPI_Barrier(cart_comm); + if( local_rank == 0 ) std::cout << "Updating vertices..." << std::endl; +#endif new_vertices = lb_obj.get_result_vertices(); old_vertices = vertices; vertices = new_vertices; if (chosen_method == ALL_LB_t::VORONOI) { +#ifdef ALL_DEBUG_ENABLED + MPI_Barrier(cart_comm); + if( local_rank == 0 ) std::cout << "Updating vertices (Voronoi)..." << std::endl; +#endif vertices.resize(2); vertices.at(1) = vertices.at(0); } +#ifdef ALL_DEBUG_ENABLED + MPI_Barrier(cart_comm); + if( local_rank == 0 ) std::cout << "Updating upper / lower vertices..." << std::endl; +#endif lp = vertices.at(0); up = vertices.at(vertices.size()-1); +#ifdef ALL_DEBUG_ENABLED + MPI_Barrier(cart_comm); + if( local_rank == 0 ) std::cout << "beginning particles transfer..." << std::endl; +#endif switch(chosen_method) { case ALL_LB_t::TENSOR: @@ -1368,8 +1431,8 @@ int main(int argc, char** argv) int offset_neig[2]; std::vector<int> neighbors; int* n_neighbors; - lb_obj.get_neighbors(chosen_method,neighbors); - lb_obj.get_neighbors(chosen_method,&n_neighbors); + lb_obj.get_neighbors(neighbors); + lb_obj.get_neighbors(&n_neighbors); offset_neig[0] = 0; offset_neig[1] = n_neighbors[0]; @@ -1982,11 +2045,11 @@ int main(int argc, char** argv) { #ifdef ALL_VORONOI // get neighbor information - int n_neighbors = lb_obj.get_n_neighbors(chosen_method); + int n_neighbors = lb_obj.get_n_neighbors(); std::vector<double> neighbor_vertices; std::vector<int> neighbors; - lb_obj.get_neighbor_vertices(chosen_method, neighbor_vertices); - lb_obj.get_neighbors(chosen_method, neighbors); + lb_obj.get_neighbor_vertices(neighbor_vertices); + lb_obj.get_neighbors(neighbors); // compute voronoi cells @@ -2277,7 +2340,7 @@ int main(int argc, char** argv) */ if ( (borders.at(2*n) <= old_border.at(0) && - borders.at(2*n+1) > old_border.at(0)) || + borders.at(2*n+1) >= old_border.at(0)) || (borders.at(2*n) <= old_border.at(1) && borders.at(2*n+1) >= old_border.at(1)) || (borders.at(2*n) > old_border.at(0) && @@ -2286,7 +2349,7 @@ int main(int argc, char** argv) send_neig.push_back(n); if ( (old_borders.at(2*n) <= local_borders.at(0) && - old_borders.at(2*n+1) > local_borders.at(0)) || + old_borders.at(2*n+1) >= local_borders.at(0)) || (old_borders.at(2*n) <= local_borders.at(1) && old_borders.at(2*n+1) >= local_borders.at(1)) || (old_borders.at(2*n) > local_borders.at(0) && @@ -2321,6 +2384,7 @@ int main(int argc, char** argv) send_vec.at(n).push_back(p.x(1)); send_vec.at(n).push_back(p.x(2)); send_vec.at(n).push_back(p.get_weight()); + send_vec.at(n).push_back((double)(p.get_id())); } } } @@ -2388,6 +2452,7 @@ int main(int argc, char** argv) recvs++; } + // transfer points from old domains to new domains for (int n = 0; n < send_neig.size(); ++n) { @@ -2416,11 +2481,12 @@ int main(int argc, char** argv) { int idx; MPI_Waitany(recv_neig.size(),rreq.data(),&idx,rsta.data()); - for (int p = 0; p < recv_vec.at(idx).size(); p+=4) + for (int p = 0; p < recv_vec.at(idx).size(); p+=5) { ALL_Point<double> tmp(3, recv_vec.at(idx).data()+p, - recv_vec.at(idx).at(p+3) + recv_vec.at(idx).at(p+3), + (long)(recv_vec.at(idx).at(p+4)) ); points.push_back(tmp); } @@ -2435,6 +2501,7 @@ int main(int argc, char** argv) break; } + if (i_loop <= 100 || i_loop % OUTPUT_INTV == 0) { #ifdef ALL_VORONOI @@ -2467,6 +2534,7 @@ int main(int argc, char** argv) #endif } + // calculate quality parameters if (!weighted_points) { @@ -2483,8 +2551,6 @@ int main(int argc, char** argv) double n_total_points; MPI_Allreduce(&n_local,&n_total_points,1,MPI_DOUBLE,MPI_SUM,cart_comm); - - MPI_Allreduce(&n_local,&n_total,1,MPI_DOUBLE,MPI_SUM,cart_comm); avg_work = n_total/(double)n_ranks; MPI_Allreduce(&n_local,&n_min,1,MPI_DOUBLE,MPI_MIN,cart_comm); @@ -2549,14 +2615,27 @@ int main(int argc, char** argv) else of.open("domain_data_w.dat", std::ios::out | std::ios::app); of << (i_loop+1) << " " << local_rank << " "; - + + if (chosen_method == ALL_LB_t::HISTOGRAM) + { + int* n_neighbors; + lb_obj.get_neighbors(&n_neighbors); + int n_neig = 0; + for (int j = 0; j < 3; ++j) + { + n_neig += n_neighbors[j]; + } + of << n_neig << " "; + } + /* for (int j = 0; j < sys_dim; ++j) { of << " " << local_coords[j] << " " << lp.x(j) << " " << up.x(j) << " "; } + */ - of << " " << n_local << " "; + of << n_local << " "; of << std::endl; if (i == n_ranks - 1) of << std::endl; @@ -2642,6 +2721,78 @@ int main(int argc, char** argv) } + // create binary output (e.g. for HimeLB) + // format: + // n_ranks integers (number of particles per domain) + // n_ranks integers (offset in file) + // <n_particles[0]> * (long + 3*double) double values (particle positions) + + + // open file + int err; + MPI_File outfile; + err = MPI_File_open(MPI_COMM_WORLD, + "particles_output.bin", + MPI_MODE_CREATE | MPI_MODE_RDWR, + MPI_INFO_NULL, + &outfile); + + // write out the number of particles on each process + MPI_File_write_at(outfile, + (MPI_Offset)(local_rank*sizeof(int)), + &n_points, + 1, + MPI_INT, + MPI_STATUS_IGNORE); + + // compute the offset for each process + long n_p = (long)n_points; + long offset; + + MPI_Scan(&n_p,&offset,1,MPI_LONG,MPI_SUM,cart_comm); + offset -= n_p; + // size of data for a single block + long block_size = sizeof(long); // + 3 * sizeof(double); + + offset *= block_size; + offset += n_ranks * (sizeof(int) + sizeof(long)); + + + MPI_File_write_at(outfile, + (MPI_Offset)( n_ranks * sizeof(int) + + local_rank * sizeof(long)), + &offset, + 1, + MPI_LONG, + MPI_STATUS_IGNORE); + + + for ( int n = 0; n < n_points; ++n) + { + long id = points.at(n).get_id(); + double loc[3]; + loc[0] = points.at(n).get_coordinate(0); + loc[1] = points.at(n).get_coordinate(1); + loc[2] = points.at(n).get_coordinate(2); + MPI_File_write_at(outfile, + (MPI_Offset)((offset+n*block_size)), + &id, + 1, + MPI_LONG, + MPI_STATUS_IGNORE); + /* + MPI_File_write_at(outfile, + (MPI_Offset)((offset+n*block_size)+sizeof(long)), + loc, + 3, + MPI_DOUBLE, + MPI_STATUS_IGNORE); + */ + } + + MPI_File_close(&outfile); + + /* // output of borders / contents if (n_ranks < 216) @@ -2686,6 +2837,7 @@ int main(int argc, char** argv) std::cout << "min_step: " << min_step << std::endl; std::cout << std::endl; } + MPI_Barrier(cart_comm); MPI_Finalize(); return EXIT_SUCCESS; } diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index a6acb87ce9d7f70cf060a1fc65e6967daeea70e8..2b52833468b60538c9348e61b952367b85236117 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -26,6 +26,15 @@ install(TARGETS ASCII2MPIBIN LIBRARY DESTINATION lib INCLUDES DESTINATION include) +add_executable(read_binary_output read_output.cpp) + +target_link_libraries(read_binary_output LINK_PUBLIC ALL) + +install(TARGETS read_binary_output + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + INCLUDES DESTINATION include) + if(CM_ALL_FORTRAN) add_executable(ALL_test_f ALL_test_f.f90) if(CM_ALL_VTK_OUTPUT) diff --git a/example/ascii2mpibin.cpp b/example/ascii2mpibin.cpp index c41ed452da8e52467fbe3284464aa894ee13d324..bd65465f45d97b71d1aa2eef2e9ff1fadb5af9ef 100644 --- a/example/ascii2mpibin.cpp +++ b/example/ascii2mpibin.cpp @@ -35,7 +35,8 @@ int main (int argc, char** argv) char line[256]; - double values[5]; + long blockID; + double values[4]; int counter = 0; double max[3]; @@ -47,9 +48,9 @@ int main (int argc, char** argv) std::string str(line); std::istringstream istr(str); //std::cout << str << std::endl; - istr >> values[0] >> values[1] >> values[2] >> values[3] >> values[4]; + istr >> blockID >> values[0] >> values[1] >> values[2] >> values[3]; for (int i = 0; i < 3; ++i) - if (max[i] < values[i+1]) max[i] = values[i+1]; + if (max[i] < values[i]) max[i] = values[i]; } // go back to start of file @@ -72,8 +73,8 @@ int main (int argc, char** argv) int offset = 0; const int n_procs = dim[0] * dim[1] * dim[2]; - std::vector<int> loc_parts(dim[0]*dim[1]*dim[2]); - std::vector<int> offset_domain(dim[0]*dim[1]*dim[2]); + std::vector<long> loc_parts(dim[0]*dim[1]*dim[2]); + std::vector<long> offset_domain(dim[0]*dim[1]*dim[2]); std::vector<int> curr_index(dim[0]*dim[1]*dim[2]); for (int i = 0; i < dim[0] * dim[1] * dim[2]; ++i) @@ -89,12 +90,12 @@ int main (int argc, char** argv) if(infile.eof()) break; std::string str(line); std::istringstream istr(str); - istr >> values[0] >> values[1] >> values[2] >> values[3] >> values[4]; + istr >> blockID >> values[0] >> values[1] >> values[2] >> values[3]; int cell[3]; for (int i = 0; i < 3; ++i) - cell[i] = (int)(values[i+1] / cell_size[i]); + cell[i] = (int)(values[i] / cell_size[i]); loc_parts.at(cell[0]*dim[1]*dim[2]+cell[1]*dim[2]+cell[2])++; } @@ -105,8 +106,9 @@ int main (int argc, char** argv) for (int i = 0; i < dim[0] * dim[1] * dim[2]; ++i) { offset_domain.at(i) -= loc_parts.at(i); - MPI_File_write_at_all(outfile,(MPI_Offset)(i*sizeof(int)),&offset_domain.at(i),1,MPI_INT,MPI_STATUS_IGNORE); - MPI_File_write_at_all(outfile,(MPI_Offset)((n_procs + i)*sizeof(int)),&loc_parts.at(i),1,MPI_INT,MPI_STATUS_IGNORE); + int n_loc = (int)loc_parts.at(i); + MPI_File_write_at_all(outfile,(MPI_Offset)(i*sizeof(long)),&offset_domain.at(i),1,MPI_LONG,MPI_STATUS_IGNORE); + MPI_File_write_at_all(outfile,(MPI_Offset)(n_procs *sizeof(long) + i * sizeof(int)),&n_loc,1,MPI_INT,MPI_STATUS_IGNORE); } // go back to start of file @@ -119,21 +121,33 @@ int main (int argc, char** argv) if(infile.eof()) break; std::string str(line); std::istringstream istr(str); - istr >> values[0] >> values[1] >> values[2] >> values[3] >> values[4]; + istr >> blockID >> values[0] >> values[1] >> values[2] >> values[3]; int cell[3]; for (int i = 0; i < 3; ++i) - cell[i] = (int)(values[i+1] / cell_size[i]); + cell[i] = (int)(values[i] / cell_size[i]); int domain = cell[0] * dim[1] * dim[2] + cell[1] * dim[2] + cell[2]; + long block_size = sizeof(long) + 4*sizeof(double); + + MPI_File_write_at_all( + outfile, + (MPI_Offset) + (n_procs*(sizeof(int) + sizeof(long))+(offset_domain.at(domain)+curr_index.at(domain))*block_size), + &blockID, + 1, + MPI_LONG, + MPI_STATUS_IGNORE + ); + MPI_File_write_at_all( outfile, (MPI_Offset) - ((2*n_procs)*sizeof(int)+5*(offset_domain.at(domain)+curr_index.at(domain))*sizeof(double)), + (n_procs*(sizeof(int) + sizeof(long))+(offset_domain.at(domain)+curr_index.at(domain))*block_size+sizeof(long)), values, - 5, + 4, MPI_DOUBLE, MPI_STATUS_IGNORE ); diff --git a/example/read_output.cpp b/example/read_output.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6c6e75a985338fa909be8eba0d5fca1f91f3a821 --- /dev/null +++ b/example/read_output.cpp @@ -0,0 +1,101 @@ +#include <stdlib.h> +#include <iostream> +#include <fstream> +#include <sstream> +#include <string> +#include <vector> +#include <numeric> +#include <mpi.h> + +int main (int argc, char** argv) +{ + MPI_Init(&argc, &argv); + + if (argc < 3) + { + std::cout << "usage: " << argv[0] << " <in_file (ASCII)> <n_p>" << std::endl; + MPI_Finalize(); + exit(-1); + } + + int n = atoi(argv[2]); + + MPI_File infile; + int err; + + err = MPI_File_open( + MPI_COMM_WORLD, + argv[1], + MPI_MODE_CREATE | MPI_MODE_RDWR, + MPI_INFO_NULL, + &infile + ); + + double positions[3]; + long blockID; + long offset; + int n_part; + + int blocksize = sizeof(long); // + 3 * sizeof(double); + + for (int d = 0; d < n; ++d) + { + MPI_File_read_at( + infile, + (MPI_Offset)(d * sizeof(int)), + &n_part, + 1, + MPI_INT, + MPI_STATUS_IGNORE + ); + MPI_File_read_at( + infile, + (MPI_Offset)(n * sizeof(int) + d * sizeof(long)), + &offset, + 1, + MPI_LONG, + MPI_STATUS_IGNORE + ); + for (int p = 0; p < n_part; ++p) + { + MPI_File_read_at( + infile, + (MPI_Offset)( + offset + + p * blocksize + ), + &blockID, + 1, + MPI_LONG, + MPI_STATUS_IGNORE + ); + /* + MPI_File_read_at( + infile, + (MPI_Offset)( + offset + + p * blocksize + + sizeof(long) + ), + positions, + 3, + MPI_DOUBLE, + MPI_STATUS_IGNORE + ); + */ + std::cout << "Rank: " << d + << " Offset: " << offset + << " N: " << n_part + << " Particle: " << p + << " ID: " << blockID + /* + << " Position: " << positions[0] << " " + << positions[1] << " " + << positions[2] << " " + */ + << std::endl; + } + } + + MPI_Finalize(); +} diff --git a/include/ALL.hpp b/include/ALL.hpp index 31f9a3fcec514bc1e930abc2374c8661e34b9656..a36bacc0f89865037f63ea03ba88e2202d890e5e 100644 --- a/include/ALL.hpp +++ b/include/ALL.hpp @@ -31,6 +31,7 @@ POSSIBILITY OF SUCH DAMAGE. #ifndef ALL_MAIN_HEADER_INC #define ALL_MAIN_HEADER_INC +#include "ALL_LB.hpp" #include "ALL_Point.hpp" #include "ALL_CustomExceptions.hpp" #include "ALL_Tensor.hpp" @@ -46,6 +47,7 @@ POSSIBILITY OF SUCH DAMAGE. #include <algorithm> #include <numeric> #include <iomanip> +#include <memory> #ifdef ALL_VTK_OUTPUT #include <vtkXMLPUnstructuredGridWriter.h> @@ -84,13 +86,11 @@ template <class T, class W> class ALL public: // constructor (empty) - ALL() : work_array(NULL), + ALL() : #ifdef ALL_VTK_OUTPUT vtk_init(false), #endif - balancer(NULL), - loadbalancing_step(0), - dimension(3) + loadbalancing_step(0) { // determine correct MPI data type for template T if (std::is_same<T,double>::value) mpi_data_type_T = MPI_DOUBLE; @@ -106,23 +106,49 @@ template <class T, class W> class ALL } // constructor (dimension only) - ALL(const int d_, const T g) : ALL() { - gamma = g; - dimension = d_; - outline = new std::vector<T>(2*d_); - estimation_limit = ALL_ESTIMATION_LIMIT_DEFAULT; - } + ALL(ALL_LB_t m, const int d, const T g) : ALL() + { + method = m; + switch(method) + { + case ALL_LB_t::TENSOR: + balancer.reset(new ALL_Tensor_LB<T,W>(d,(W)0,g)); + break; + case ALL_LB_t::STAGGERED: + balancer.reset(new ALL_Staggered_LB<T,W>(d,(W)0,g)); + break; + case ALL_LB_t::UNSTRUCTURED: + balancer.reset(new ALL_Unstructured_LB<T,W>(d,(W)0,g)); + break; + case ALL_LB_t::VORONOI: +#ifdef ALL_VORONOI + balancer.reset(new ALL_Voronoi_LB<T,W>(d,(W)0,g)); +#endif + break; + case ALL_LB_t::HISTOGRAM: + balancer.reset(new ALL_Histogram_LB<T,W>(d,std::vector<W>(10),g)); + break; + default: + throw ALL_Invalid_Argument_Exception( + __FILE__, + __func__, + __LINE__, + "Unknown type of loadbalancing passed."); + } + balancer->set_dimension(d); + balancer->set_gamma(g); + estimation_limit = ALL_ESTIMATION_LIMIT_DEFAULT; + } // constructor (dimension + vertices) - ALL(const int d_, const std::vector<ALL_Point<T>>& inp, const T g) : ALL(d_,g) + ALL(ALL_LB_t method, const int d_, std::vector<ALL_Point<T>>& inp, const T g) : ALL(method, d_,g) { - vertices = inp; - estimation_limit = ALL_ESTIMATION_LIMIT_DEFAULT; + balancer->set_vertices(inp); calculate_outline(); } // destructor - ~ALL(); + ~ALL() = default; // overwrite vertices // with a vector @@ -144,58 +170,52 @@ template <class T, class W> class ALL // vector of work (cell based -> work per cell) void set_work(const std::vector<W>&); - // separate method used to create the balancer object (no setup method included) - void create_balancer(ALL_LB_t); - // setup balancer - void setup(ALL_LB_t); + void setup(); // calculate load-balance - void balance(ALL_LB_t method, bool internal = false); + std::vector<ALL_Point<T>>& balance(bool internal = false); // separate call ONLY for the setup method of the balancer - void setup_balancer(ALL_LB_t method); - - // cleanup balancer - void cleanup(ALL_LB_t); + void setup_balancer(); // getter functions // vertices - std::vector<ALL_Point<T>>& get_vertices() {return vertices;}; - std::vector<ALL_Point<T>>& get_result_vertices() {return result_vertices;}; + std::vector<ALL_Point<T>>& get_vertices() {return balancer->get_vertices();}; + std::vector<ALL_Point<T>>& get_result_vertices() {return balancer->get_shifted_vertices();}; // dimension - int get_dimension() {return dimension;} + int get_dimension() {return balancer->get_dimension();} // work void get_work(std::vector<W>&); void get_work(W&); // neighbors - int get_n_neighbors(ALL_LB_t); + int get_n_neighbors(); // provide list of neighbors (ranks) - void get_neighbors(ALL_LB_t,std::vector<int>&); + void get_neighbors(std::vector<int>&); // provide a list of neighbor vertices (VORONOI only) - void get_neighbor_vertices(ALL_LB_t,std::vector<T>&); + void get_neighbor_vertices(std::vector<T>&); // provide list of neighbors in each direction - void get_neighbors(ALL_LB_t,int**); + void get_neighbors(int**); // set system size // (x_min, x_max, y_min, y_max, z_min, z_max) - void set_sys_size(ALL_LB_t,std::vector<T>&); + void set_sys_size(std::vector<T>&); // method to set method specific data, without // the necessity to create dummy hooks for other // methods - void set_method_data(ALL_LB_t,void*); + void set_method_data(void*); // if using a non-cartesian communicator set the processor grid // parameters to create an internal cartesian grid void set_proc_grid_params(const std::vector<int>&, const std::vector<int>&); void set_proc_grid_params(int*, int*); - void set_min_domain_size(ALL_LB_t,T*); + void set_min_domain_size(const std::vector<T>&); #ifdef ALL_VTK_OUTPUT // print outlines and points within domain @@ -205,43 +225,34 @@ template <class T, class W> class ALL #endif private: - ALL_LB_t load_balancer_type; + ALL_LB_t method; // outer cuboid encasing the domain (for cuboid domains identical to domain) // defined by front left lower [0][*] and back upper right points [1][*] // where * is 0,...,dim-1 - std::vector<T>* outline; - - // gamma = correction factor - T gamma; + std::vector<ALL_Point<T>> outline; - // dimension of the system (>=1) - int dimension; +// // gamma = correction factor +// T gamma; +// +// // dimension of the system (>=1) +// int dimension; // balancer - void *balancer; + std::unique_ptr<ALL_LB<T,W>> balancer; // data containing cells (TODO: defintion of data layout, etc.) - std::vector<W> *work_array; + std::vector<W> work_array; + // MPI communicator used in library + MPI_Comm communicator; + // number of vertices (for non-cuboid domains) int n_vertices; // number of neighbors int n_neighbors; - // list of vertices - std::vector<ALL_Point<T>> vertices; - - // list of vertices (after a balancing shift) - std::vector<ALL_Point<T>> result_vertices; - - // list of neighbors - std::vector<int> neighbors; - - // global MPI communicator - MPI_Comm communicator; - // calculate the outline of the domain using the vertices void calculate_outline(); @@ -268,117 +279,46 @@ template <class T, class W> class ALL #endif }; -template <class T, class W> ALL<T,W>::~ALL() -{ - delete outline; - delete work_array; - proc_grid_set = false; - - try - { - switch(load_balancer_type) - { - case ALL_LB_t::TENSOR: - delete (ALL_Tensor_LB<T,W>*)balancer; - break; - case ALL_LB_t::STAGGERED: - delete (ALL_Staggered_LB<T,W>*)balancer; - break; - case ALL_LB_t::UNSTRUCTURED: - delete (ALL_Unstructured_LB<T,W>*)balancer; - break; - case ALL_LB_t::VORONOI: -#ifdef ALL_VORONOI - delete (ALL_Voronoi_LB<T,W>*)balancer; -#endif - break; - case ALL_LB_t::HISTOGRAM: - delete (ALL_Histogram_LB<T,W>*)balancer; - break; - default: - throw ALL_Invalid_Argument_Exception( - __FILE__, - __func__, - __LINE__, - "Unknown type of loadbalancing passed."); - } - } - catch (ALL_Custom_Exception e) - { - std::cout << e.what() << std::endl; - MPI_Abort(MPI_COMM_WORLD,-1); - } -} - template <class T, class W> void ALL<T,W>::set_vertices(std::vector<ALL_Point<T>>& inp) { int dim = inp.at(0).get_dimension(); - if (dim != dimension) throw ALL_Point_Dimension_Missmatch_Exception( + if (dim != balancer->get_dimension()) throw ALL_Point_Dimension_Missmatch_Exception( __FILE__, __func__, __LINE__, "Dimension of ALL_Points in input vector do not match dimension of ALL object."); - // clean old vertices (overwrite) - vertices = inp; + balancer->set_vertices(inp); calculate_outline(); } template <class T, class W> void ALL<T,W>::set_vertices(const int n, const int dim, const T* inp) { - if (dim != dimension) throw ALL_Point_Dimension_Missmatch_Exception( + std::vector<ALL_Point<T>> points(n,ALL_Point<T>(dim)); + if (dim != balancer->get_dimension()) throw ALL_Point_Dimension_Missmatch_Exception( __FILE__, __func__, __LINE__, "Dimension of ALL_Points in input vector do not match dimension of ALL object."); - ALL_Point<double> tmp(dim); - vertices = std::vector<ALL_Point<double>>(n,tmp); for (int i = 0; i < n; ++i) { for (int d = 0; d < dim; ++d) { - vertices.at(i).set_coordinate(d,inp[ i * dim + d ]); + points.at(i).set_coordinate(d,inp[ i * dim + d ]); } } + balancer->set_vertices(points); calculate_outline(); } -template <class T, class W> void ALL<T,W>::calculate_outline() -{ - // calculation only possible if there are vertices defining the domain - if (vertices.size() > 0) - { - // setup the outline with the first point - for (int i = 0; i < dimension; ++i) - { - outline->at(0*dimension+i) = outline->at(1*dimension+i) = vertices.at(0).x(i); - } - // compare value of each outline point with all vertices to find the maximum - // extend of the domain in each dimension - for (int i = 1; i < vertices.size(); ++i) - { - ALL_Point<T> p = vertices.at(i); - for( int j = 0; j < dimension; ++j) - { - outline->at(0*dimension+j) = std::min(outline->at(0*dimension+j),p.x(j)); - outline->at(1*dimension+j) = std::max(outline->at(1*dimension+j),p.x(j)); - } - } - } -} - template <class T, class W> void ALL<T,W>::set_work(W work) { - // clear work_array - if (work_array) delete work_array; // set new value for work (single value for whole domain) - work_array = new std::vector<W>(1); - work_array->at(0) = work; + balancer->set_work(work); } template <class T, class W> void ALL<T,W>::set_work(const std::vector<W>& work) { - if (work_array) delete work_array; - work_array = new std::vector<W>(work); + balancer->set_work(work); } template <class T, class W> void ALL<T,W>::set_proc_grid_params(const std::vector<int>& loc, const std::vector<int>& size) @@ -390,9 +330,9 @@ template <class T, class W> void ALL<T,W>::set_proc_grid_params(const std::vecto template <class T, class W> void ALL<T,W>::set_proc_grid_params(int* loc, int* size) { - proc_grid_loc.resize(dimension); - proc_grid_dim.resize(dimension); - for (int i = 0; i < dimension; ++i) + proc_grid_loc.resize(balancer->get_dimension()); + proc_grid_dim.resize(balancer->get_dimension()); + for (int i = 0; i < balancer->get_dimension(); ++i) { proc_grid_loc.at(i) = loc[i]; proc_grid_dim.at(i) = size[i]; @@ -406,17 +346,17 @@ template <class T, class W> void ALL<T,W>::set_communicator(MPI_Comm comm) if (MPI_Topo_test(comm, &comm_type) == MPI_CART) { communicator = comm; + balancer->set_communicator(communicator); } else { int rank; MPI_Comm_rank(comm, &rank); - if (rank == 0) std::cout << "creating internal cartesian communicator from process grid data" << std::endl; if (proc_grid_set) { // compute 1D index from the location in the process grid (using z-y-x ordering, as MPI_Dims_create) int idx; - switch (dimension) + switch (balancer->get_dimension()) { case 3: idx = proc_grid_loc.at(2) + @@ -446,7 +386,8 @@ template <class T, class W> void ALL<T,W>::set_communicator(MPI_Comm comm) std::vector<int> periods(3,1); // transform temporary communicator to cartesian communicator - MPI_Cart_create(temp_comm, dimension, proc_grid_dim.data(), periods.data(), 0, &communicator); + MPI_Cart_create(temp_comm, balancer->get_dimension(), proc_grid_dim.data(), periods.data(), 0, &communicator); + balancer->set_communicator(communicator); } else { @@ -459,141 +400,60 @@ template <class T, class W> void ALL<T,W>::set_communicator(MPI_Comm comm) } } -template <class T, class W> void ALL<T,W>::get_work(W& result) +template <class T, class W> void ALL<T,W>::calculate_outline() { - result = work_array->at(0); + // calculation only possible if there are vertices defining the domain + if (balancer->get_vertices().size() > 0) + { + outline.resize(2); + // setup the outline with the first point + for (int i = 0; i < balancer->get_dimension(); ++i) + { + outline.at(0) = balancer->get_vertices().at(0); + outline.at(1) = balancer->get_vertices().at(0); + } + // compare value of each outline point with all vertices to find the maximum + // extend of the domain in each dimension + for (int i = 1; i < balancer->get_vertices().size(); ++i) + { + ALL_Point<T> p = balancer->get_vertices().at(i); + for( int j = 0; j < balancer->get_dimension(); ++j) + { + outline.at(0).set_coordinate(j, std::min(outline.at(0).x(j),p.x(j))); + outline.at(1).set_coordinate(j, std::max(outline.at(1).x(j),p.x(j))); + } + } + } } -template <class T, class W> void ALL<T,W>::get_work(std::vector<W>& result) +template <class T, class W> void ALL<T,W>::get_work(W& result) { - result = work_array; + result = balancer->get_work().at(0); } -template <class T, class W> void ALL<T,W>::create_balancer(ALL_LB_t method) +template <class T, class W> void ALL<T,W>::get_work(std::vector<W>& result) { - try - { - switch(method) - { - case ALL_LB_t::TENSOR: - balancer = new ALL_Tensor_LB<T,W>(dimension,work_array->at(0),gamma); - break; - case ALL_LB_t::STAGGERED: - balancer = new ALL_Staggered_LB<T,W>(dimension,work_array->at(0),gamma); - break; - case ALL_LB_t::UNSTRUCTURED: - balancer = new ALL_Unstructured_LB<T,W>(dimension,work_array->at(0),gamma); - break; - case ALL_LB_t::VORONOI: -#ifdef ALL_VORONOI - balancer = new ALL_Voronoi_LB<T,W>(dimension,work_array->at(0),gamma); -#endif - break; - case ALL_LB_t::HISTOGRAM: - balancer = new ALL_Histogram_LB<T,W>(dimension,work_array->at(0),gamma); - break; - default: - throw ALL_Invalid_Argument_Exception( - __FILE__, - __func__, - __LINE__, - "Unknown type of loadbalancing passed."); - break; - } - } - catch (ALL_Custom_Exception e) - { - std::cout << e.what() << std::endl; - MPI_Abort(MPI_COMM_WORLD,-1); - } + result = balancer->get_work(); } -template <class T, class W> void ALL<T,W>::setup(ALL_LB_t method) +template <class T, class W> void ALL<T,W>::setup() { try { - switch(method) - { - case ALL_LB_t::TENSOR: - if (balancer) delete (ALL_Tensor_LB<T,W>*)balancer; - balancer = new ALL_Tensor_LB<T,W>(dimension,work_array->at(0),gamma); - ((ALL_Tensor_LB<T,W>*)balancer)->set_vertices(outline->data()); - ((ALL_Tensor_LB<T,W>*)balancer)->setup(communicator); - break; - case ALL_LB_t::STAGGERED: - if (balancer) delete (ALL_Staggered_LB<T,W>*)balancer; - balancer = new ALL_Staggered_LB<T,W>(dimension,work_array->at(0),gamma); - ((ALL_Staggered_LB<T,W>*)balancer)->set_vertices(outline->data()); - ((ALL_Staggered_LB<T,W>*)balancer)->setup(communicator); - break; - case ALL_LB_t::UNSTRUCTURED: - if (balancer) delete (ALL_Unstructured_LB<T,W>*)balancer; - balancer = new ALL_Unstructured_LB<T,W>(dimension,work_array->at(0),gamma); - ((ALL_Unstructured_LB<T,W>*)balancer)->set_vertices(vertices); - ((ALL_Unstructured_LB<T,W>*)balancer)->setup(communicator); - break; - case ALL_LB_t::VORONOI: -#ifdef ALL_VORONOI - if (balancer) delete (ALL_Voronoi_LB<T,W>*)balancer; - balancer = new ALL_Voronoi_LB<T,W>(dimension,work_array->at(0),gamma); - ((ALL_Voronoi_LB<T,W>*)balancer)->set_vertices(vertices); - ((ALL_Voronoi_LB<T,W>*)balancer)->setup(communicator); -#endif - break; - case ALL_LB_t::HISTOGRAM: - if (balancer) delete (ALL_Histogram_LB<T,W>*)balancer; - balancer = new ALL_Histogram_LB<T,W>(dimension,work_array,gamma); - ((ALL_Histogram_LB<T,W>*)balancer)->set_vertices(outline->data()); - ((ALL_Histogram_LB<T,W>*)balancer)->setup(communicator); - break; - default: - throw ALL_Invalid_Argument_Exception( - __FILE__, - __func__, - __LINE__, - "Unknown type of loadbalancing passed."); - break; - } + balancer->setup(); } catch (ALL_Custom_Exception e) { std::cout << e.what() << std::endl; MPI_Abort(MPI_COMM_WORLD,-1); } - load_balancer_type = method; } -template <class T, class W> void ALL<T,W>::setup_balancer(ALL_LB_t method) +template <class T, class W> void ALL<T,W>::setup_balancer() { try { - switch(method) - { - case ALL_LB_t::TENSOR: - ((ALL_Tensor_LB<T,W>*)balancer)->setup(communicator); - break; - case ALL_LB_t::STAGGERED: - ((ALL_Staggered_LB<T,W>*)balancer)->setup(communicator); - break; - case ALL_LB_t::UNSTRUCTURED: - ((ALL_Unstructured_LB<T,W>*)balancer)->setup(communicator); - break; - case ALL_LB_t::VORONOI: -#ifdef ALL_VORONOI - ((ALL_Voronoi_LB<T,W>*)balancer)->setup(communicator); -#endif - break; - case ALL_LB_t::HISTOGRAM: - ((ALL_Histogram_LB<T,W>*)balancer)->setup(communicator); - break; - default: - throw ALL_Invalid_Argument_Exception( - __FILE__, - __func__, - __LINE__, - "Unknown type of loadbalancing passed."); - break; - } + balancer->setup(communicator); } catch (ALL_Custom_Exception e) { @@ -602,148 +462,97 @@ template <class T, class W> void ALL<T,W>::setup_balancer(ALL_LB_t method) } } -template <class T, class W> void ALL<T,W>::balance(ALL_LB_t method, bool internal) +template <class T, class W> std::vector<ALL_Point<T>>& ALL<T,W>::balance(bool internal) { try { - n_vertices = vertices.size(); - /* - n_vertices = 2; - if (method == ALL_LB_t::UNSTRUCTURED) - { - n_vertices = 8; - } - */ - std::vector<T> result(n_vertices*dimension); + n_vertices = balancer->get_vertices().size(); switch(method) { case ALL_LB_t::TENSOR: - ((ALL_Tensor_LB<T,W>*)balancer)->set_vertices(outline->data()); - result_vertices = vertices; - ((ALL_Tensor_LB<T,W>*)balancer)->balance(); - - ((ALL_Tensor_LB<T,W>*)balancer)->get_shifted_vertices(result); - - for (int i = 0; i < result_vertices.size(); ++i) - { - for (int d = 0; d < result_vertices.at(i).get_dimension(); ++d) - { - result_vertices.at(i).set_coordinate( - d, - result.at(i*result_vertices.at(i).get_dimension()+d) - ); - } - } - + balancer->balance(loadbalancing_step); break; case ALL_LB_t::STAGGERED: + + /* ToDo: Check if repetition is useful at all and + /* change it to be included for all methods, + * not only STAGGERED */ + /* + // estimation to improve speed of convergence + // changing vertices during estimation + std::vector<ALL_Point<T>> tmp_vertices = balancer->get_vertices(); + // saved original vertices + std::vector<ALL_Point<T>> old_vertices = balancer->get_vertices(); + // old temporary vertices + std::vector<ALL_Point<T>> old_tmp_vertices = balancer->get_vertices(); + // result temporary vertices + std::vector<ALL_Point<T>> result_vertices = balancer->get_vertices(); + // temporary work + W tmp_work = balancer->get_work().at(0); + // old temporary work + W old_tmp_work = balancer->get_work().at(0); + + W max_work; + W sum_work; + double d_max; + int n_ranks; + + // compute work density on local domain + double V = 1.0; + for (int i = 0; i < balancer->get_dimension(); ++i) + V *= ( outline.at(1).x(i) - outline.at(0).x(i) ); + double rho = work_array.at(0) / V; + + // collect maximum work in system + MPI_Allreduce(work_array.data(), &max_work, 1, mpi_data_type_W, MPI_MAX, communicator); + MPI_Allreduce(work_array.data(), &sum_work, 1, mpi_data_type_W, MPI_SUM, communicator); + MPI_Comm_size(communicator,&n_ranks); + d_max = (double)(max_work) * (double)n_ranks / (double)sum_work - 1.0; + + + for (int i = 0; i < estimation_limit && d_max > 0.1 && !internal; ++i) { - // estimation to improve speed of convergence - // changing vertices during estimation - std::vector<ALL_Point<T>> tmp_vertices = vertices; - // saved original vertices - std::vector<ALL_Point<T>> old_vertices = vertices; - // old temporary vertices - std::vector<ALL_Point<T>> old_tmp_vertices = vertices; - // temporary work - W tmp_work = work_array->at(0); - // old temporary work - W old_tmp_work = work_array->at(0); - - W max_work; - W sum_work; - double d_max; - int n_ranks; - - // compute work density on local domain - double V = 1.0; - for (int i = 0; i < dimension; ++i) - V *= ( outline->at(3+i) - outline->at(i) ); - double rho = work_array->at(0) / V; - - // collect maximum work in system - MPI_Allreduce(work_array->data(), &max_work, 1, mpi_data_type_W, MPI_MAX, communicator); - MPI_Allreduce(work_array->data(), &sum_work, 1, mpi_data_type_W, MPI_SUM, communicator); - MPI_Comm_size(communicator,&n_ranks); - d_max = (double)(max_work) * (double)n_ranks / (double)sum_work - 1.0; - - - for (int i = 0; i < estimation_limit && d_max > 0.1 && !internal; ++i) + balancer->set_work(tmp_work); + balancer->setup(); + balancer->balance(true); + bool sane = true; + // check if the estimated boundaries are not too deformed + for (int j = 0; j < balancer->get_dimension(); ++j) + sane = sane && (result_vertices.at(0).x(j) <= old_vertices.at(1).x(j)); + MPI_Allreduce(MPI_IN_PLACE,&sane,1,MPI_CXX_BOOL,MPI_LAND,communicator); + if (sane) { - work_array->at(0) = tmp_work; - setup(method); - balance(method,true); - bool sane = true; - // check if the estimated boundaries are not too deformed - for (int j = 0; j < dimension; ++j) - sane = sane && (result_vertices.at(0).x(j) <= old_vertices.at(1).x(j)); - MPI_Allreduce(MPI_IN_PLACE,&sane,1,MPI_CXX_BOOL,MPI_LAND,communicator); - if (sane) - { - old_tmp_vertices = tmp_vertices; - tmp_vertices = result_vertices; - V = 1.0; - for (int i = 0; i < dimension; ++i) - V *= ( tmp_vertices.at(1).x(i) - tmp_vertices.at(0).x(i) ); - old_tmp_work = tmp_work; - tmp_work = rho * V; - } - else if (!sane || i == estimation_limit - 1) - { - vertices = old_tmp_vertices; - work_array->at(0) = old_tmp_work; - calculate_outline(); - i = estimation_limit; - } + old_tmp_vertices = tmp_vertices; + tmp_vertices = result_vertices; + V = 1.0; + for (int i = 0; i < balancer->get_dimension(); ++i) + V *= ( tmp_vertices.at(1).x(i) - tmp_vertices.at(0).x(i) ); + old_tmp_work = tmp_work; + tmp_work = rho * V; } - - ((ALL_Staggered_LB<T,W>*)balancer)->set_vertices(outline->data()); - result_vertices = vertices; - ((ALL_Staggered_LB<T,W>*)balancer)->balance(); - - ((ALL_Staggered_LB<T,W>*)balancer)->get_shifted_vertices(result); - for (int i = 0; i < result_vertices.size(); ++i) + else if (!sane || i == estimation_limit - 1) { - for (int d = 0; d < result_vertices.at(i).get_dimension(); ++d) - { - result_vertices.at(i).set_coordinate( - d, - result.at(i*result_vertices.at(i).get_dimension()+d) - ); - } + balancer->get_vertices() = old_tmp_vertices; + work_array->at(0) = old_tmp_work; + calculate_outline(); + i = estimation_limit; } } + + ((ALL_Staggered_LB<T,W>*)balancer.get())->set_vertices(outline->data()); + */ + balancer->balance(loadbalancing_step); break; case ALL_LB_t::UNSTRUCTURED: - ((ALL_Unstructured_LB<T,W>*)balancer)->set_vertices(vertices); - result_vertices = vertices; - ((ALL_Unstructured_LB<T,W>*)balancer)->balance(); - ((ALL_Unstructured_LB<T,W>*)balancer)->get_shifted_vertices(result_vertices); + balancer->balance(loadbalancing_step); break; case ALL_LB_t::VORONOI: #ifdef ALL_VORONOI - ((ALL_Voronoi_LB<T,W>*)balancer)->set_vertices(vertices); - ((ALL_Voronoi_LB<T,W>*)balancer)->set_step(loadbalancing_step); - result_vertices = vertices; - ((ALL_Voronoi_LB<T,W>*)balancer)->balance(); - ((ALL_Voronoi_LB<T,W>*)balancer)->get_shifted_vertices(result_vertices); + balancer->balance(loadbalancing_step); #endif break; case ALL_LB_t::HISTOGRAM: - ((ALL_Histogram_LB<T,W>*)balancer)->set_vertices(outline->data()); - result_vertices = vertices; - ((ALL_Histogram_LB<T,W>*)balancer)->balance(loadbalancing_step); - ((ALL_Histogram_LB<T,W>*)balancer)->get_shifted_vertices(result); - for (int i = 0; i < result_vertices.size(); ++i) - { - for (int d = 0; d < result_vertices.at(i).get_dimension(); ++d) - { - result_vertices.at(i).set_coordinate( - d, - result.at(i*result_vertices.at(i).get_dimension()+d) - ); - } - } + balancer->balance(loadbalancing_step); break; default: throw ALL_Invalid_Argument_Exception( @@ -751,7 +560,6 @@ template <class T, class W> void ALL<T,W>::balance(ALL_LB_t method, bool interna __func__, __LINE__, "Unknown type of loadbalancing passed."); - break; } loadbalancing_step++; } @@ -760,79 +568,16 @@ template <class T, class W> void ALL<T,W>::balance(ALL_LB_t method, bool interna std::cout << e.what() << std::endl; MPI_Abort(MPI_COMM_WORLD,-1); } -} + return balancer->get_shifted_vertices(); -template <class T, class W> void ALL<T,W>::cleanup(ALL_LB_t method) -{ - try - { - switch(method) - { - case ALL_LB_t::TENSOR: - delete((ALL_Tensor_LB<T,W>*)balancer); - break; - case ALL_LB_t::STAGGERED: - delete((ALL_Staggered_LB<T,W>*)balancer); - break; - case ALL_LB_t::UNSTRUCTURED: - delete((ALL_Unstructured_LB<T,W>*)balancer); - break; - case ALL_LB_t::VORONOI: -#ifdef ALL_VORONOI - delete((ALL_Voronoi_LB<T,W>*)balancer); -#endif - break; - case ALL_LB_t::HISTOGRAM: - delete((ALL_Histogram_LB<T,W>*)balancer); - break; - default: - throw ALL_Invalid_Argument_Exception( - __FILE__, - __func__, - __LINE__, - "Unknown type of loadbalancing passed."); - break; - } - } - catch(ALL_Custom_Exception e) - { - std::cout << e.what() << std::endl; - MPI_Abort(MPI_COMM_WORLD,-1); - } } -template <class T, class W> int ALL<T,W>::get_n_neighbors(ALL_LB_t method) +template <class T, class W> int ALL<T,W>::get_n_neighbors() { std::vector<int> neig; try { - switch(method) - { - case ALL_LB_t::TENSOR: - ((ALL_Tensor_LB<T,W>*)balancer)->get_neighbors(neig); - break; - case ALL_LB_t::STAGGERED: - ((ALL_Staggered_LB<T,W>*)balancer)->get_neighbors(neig); - break; - case ALL_LB_t::UNSTRUCTURED: - ((ALL_Unstructured_LB<T,W>*)balancer)->get_neighbors(neig); - break; - case ALL_LB_t::VORONOI: -#ifdef ALL_VORONOI - ((ALL_Voronoi_LB<T,W>*)balancer)->get_neighbors(neig); -#endif - break; - case ALL_LB_t::HISTOGRAM: - ((ALL_Histogram_LB<T,W>*)balancer)->get_neighbors(neig); - break; - default: - throw ALL_Invalid_Argument_Exception( - __FILE__, - __func__, - __LINE__, - "Unknown type of loadbalancing passed."); - break; - } + balancer->get_neighbors(neig); } catch(ALL_Custom_Exception e) { @@ -842,7 +587,7 @@ template <class T, class W> int ALL<T,W>::get_n_neighbors(ALL_LB_t method) return neig.size(); } -template <class T, class W> void ALL<T,W>::get_neighbor_vertices(ALL_LB_t method, std::vector<T>& nv) +template <class T, class W> void ALL<T,W>::get_neighbor_vertices(std::vector<T>& nv) { switch(method) { @@ -854,7 +599,7 @@ template <class T, class W> void ALL<T,W>::get_neighbor_vertices(ALL_LB_t method break; case ALL_LB_t::VORONOI: #ifdef ALL_VORONOI - ((ALL_Voronoi_LB<T,W>*)balancer)->get_neighbor_vertices(nv); + ((ALL_Voronoi_LB<T,W>*)balancer.get())->get_neighbor_vertices(nv); #endif break; case ALL_LB_t::HISTOGRAM: @@ -864,37 +609,11 @@ template <class T, class W> void ALL<T,W>::get_neighbor_vertices(ALL_LB_t method } } -template <class T, class W> void ALL<T,W>::set_min_domain_size(ALL_LB_t method, T* min_size) +template <class T, class W> void ALL<T,W>::set_min_domain_size(const std::vector<T>& min_size) { try { - switch(method) - { - case ALL_LB_t::TENSOR: - //((ALL_Tensor_LB<T,W>*)balancer)->get_neighbors(neig); - break; - case ALL_LB_t::STAGGERED: - ((ALL_Staggered_LB<T,W>*)balancer)->set_min_domain_size(min_size); - break; - case ALL_LB_t::UNSTRUCTURED: - //((ALL_Unstructured_LB<T,W>*)balancer)->get_neighbors(neig); - break; - case ALL_LB_t::VORONOI: -#ifdef ALL_VORONOI - //((ALL_Voronoi_LB<T,W>*)balancer)->get_neighbors(neig); -#endif - break; - case ALL_LB_t::HISTOGRAM: - //((ALL_Histogram_LB<T,W>*)balancer)->get_neighbors(neig); - break; - default: - throw ALL_Invalid_Argument_Exception( - __FILE__, - __func__, - __LINE__, - "Unknown type of loadbalancing passed."); - break; - } + (balancer.get())->set_min_domain_size(min_size); } catch(ALL_Custom_Exception e) { @@ -903,37 +622,11 @@ template <class T, class W> void ALL<T,W>::set_min_domain_size(ALL_LB_t method, } } -template <class T, class W> void ALL<T,W>::get_neighbors(ALL_LB_t method, std::vector<int>& neig) +template <class T, class W> void ALL<T,W>::get_neighbors(std::vector<int>& neig) { try { - switch(method) - { - case ALL_LB_t::TENSOR: - ((ALL_Tensor_LB<T,W>*)balancer)->get_neighbors(neig); - break; - case ALL_LB_t::STAGGERED: - ((ALL_Staggered_LB<T,W>*)balancer)->get_neighbors(neig); - break; - case ALL_LB_t::UNSTRUCTURED: - ((ALL_Unstructured_LB<T,W>*)balancer)->get_neighbors(neig); - break; - case ALL_LB_t::VORONOI: -#ifdef ALL_VORONOI - ((ALL_Voronoi_LB<T,W>*)balancer)->get_neighbors(neig); -#endif - break; - case ALL_LB_t::HISTOGRAM: - ((ALL_Histogram_LB<T,W>*)balancer)->get_neighbors(neig); - break; - default: - throw ALL_Invalid_Argument_Exception( - __FILE__, - __func__, - __LINE__, - "Unknown type of loadbalancing passed."); - break; - } + balancer->get_neighbors(neig); } catch(ALL_Custom_Exception e) { @@ -942,37 +635,11 @@ template <class T, class W> void ALL<T,W>::get_neighbors(ALL_LB_t method, std::v } } -template <class T, class W> void ALL<T,W>::get_neighbors(ALL_LB_t method, int** neig) +template <class T, class W> void ALL<T,W>::get_neighbors(int** neig) { try { - switch(method) - { - case ALL_LB_t::TENSOR: - ((ALL_Tensor_LB<T,W>*)balancer)->get_neighbors(neig); - break; - case ALL_LB_t::STAGGERED: - ((ALL_Staggered_LB<T,W>*)balancer)->get_neighbors(neig); - break; - case ALL_LB_t::UNSTRUCTURED: - ((ALL_Unstructured_LB<T,W>*)balancer)->get_neighbors(neig); - break; - case ALL_LB_t::VORONOI: -#ifdef ALL_VORONOI - ((ALL_Voronoi_LB<T,W>*)balancer)->get_neighbors(neig); -#endif - break; - case ALL_LB_t::HISTOGRAM: - ((ALL_Histogram_LB<T,W>*)balancer)->get_neighbors(neig); - break; - default: - throw ALL_Invalid_Argument_Exception( - __FILE__, - __func__, - __LINE__, - "Unknown type of loadbalancing passed."); - break; - } + balancer->get_neighbors(neig); } catch(ALL_Custom_Exception e) { @@ -981,53 +648,14 @@ template <class T, class W> void ALL<T,W>::get_neighbors(ALL_LB_t method, int** } } -template <class T, class W> void ALL<T,W>::set_sys_size(ALL_LB_t method, std::vector<T>& s_size) +template <class T, class W> void ALL<T,W>::set_sys_size(std::vector<T>& s_size) { - switch(method) - { - case ALL_LB_t::TENSOR: - break; - case ALL_LB_t::STAGGERED: - break; - case ALL_LB_t::UNSTRUCTURED: - break; - case ALL_LB_t::VORONOI: -#ifdef ALL_VORONOI - ((ALL_Voronoi_LB<T,W>*)balancer)->set_sys_size(s_size); -#endif - break; - case ALL_LB_t::HISTOGRAM: - ((ALL_Histogram_LB<T,W>*)balancer)->set_sys_size(s_size); - break; - default: - throw ALL_Invalid_Argument_Exception( - __FILE__, - __func__, - __LINE__, - "Unknown type of loadbalancing passed."); - break; - } - + balancer.get()->set_sys_size(s_size); } -template <class T, class W> void ALL<T,W>::set_method_data(ALL_LB_t method, void* data) +template <class T, class W> void ALL<T,W>::set_method_data(void* data) { - switch(method) - { - case ALL_LB_t::TENSOR: - break; - case ALL_LB_t::STAGGERED: - break; - case ALL_LB_t::UNSTRUCTURED: - break; -#ifdef ALL_VORONOI - case ALL_LB_t::VORONOI: - break; -#endif - case ALL_LB_t::HISTOGRAM: - ((ALL_Histogram_LB<T,W>*)balancer)->set_data(data); - break; - } + balancer.get()->set_data(data); } #ifdef ALL_VTK_OUTPUT @@ -1036,7 +664,7 @@ template <class T, class W> void ALL<T,W>::print_vtk_outlines(int step) // define grid points, i.e. vertices of local domain auto points = vtkSmartPointer<vtkPoints>::New(); // allocate space for eight points (describing the cuboid around the domain) - points->Allocate(8 * dimension); + points->Allocate(8 * balancer->get_dimension()); int n_ranks; int local_rank; @@ -1053,8 +681,8 @@ template <class T, class W> void ALL<T,W>::print_vtk_outlines(int step) MPI_Comm_size(communicator,&n_ranks); std::vector<ALL_Point<W>> tmp_outline(2); - tmp_outline.at(0) = ALL_Point<W>(3,outline->data()); - tmp_outline.at(1) = ALL_Point<W>(3,outline->data()+3); + tmp_outline.at(0) = ALL_Point<W>(3,{outline.at(0).x(0),outline.at(0).x(1),outline.at(0).x(2)}); + tmp_outline.at(1) = ALL_Point<W>(3,{outline.at(1).x(0),outline.at(1).x(1),outline.at(1).x(2)}); for (auto z = 0; z <= 1; ++z) for (auto y = 0; y <= 1; ++y) @@ -1078,7 +706,7 @@ template <class T, class W> void ALL<T,W>::print_vtk_outlines(int step) work->SetNumberOfComponents(1); work->SetNumberOfTuples(1); work->SetName("work"); - W total_work = std::accumulate(work_array->begin(),work_array->end(),(W)0); + W total_work = std::accumulate(balancer->get_work().begin(),balancer->get_work().end(),(W)0); work->SetValue(0, total_work); // determine extent of system @@ -1092,8 +720,8 @@ template <class T, class W> void ALL<T,W>::print_vtk_outlines(int step) for(int i = 0; i < 3; ++i) { - local_min[i] = outline->at(i); - local_max[i] = outline->at(3+i); + local_min[i] = outline.at(0).x(i); + local_max[i] = outline.at(1).x(i); width[i] = local_max[i] - local_min[i]; volume *= width[i]; } @@ -1173,26 +801,26 @@ template <class T, class W> void ALL<T,W>::print_vtk_vertices(int step) // local vertices // (vertices + work) - T local_vertices[n_vertices * dimension + 1]; + T local_vertices[n_vertices * balancer->get_dimension() + 1]; for (int v = 0; v < n_vertices; ++v) { - for (int d = 0; d < dimension; ++d) + for (int d = 0; d < balancer->get_dimension(); ++d) { - local_vertices[v * dimension + d] = vertices.at(v).x(d); + local_vertices[v * balancer->get_dimension() + d] = balancer->get_vertices().at(v).x(d); } } - local_vertices[n_vertices * dimension] = (T)work_array->at(0); + local_vertices[n_vertices * balancer->get_dimension()] = (T)balancer->get_work().at(0); T* global_vertices; if (local_rank == 0) { - global_vertices = new T[n_ranks * (n_vertices * dimension + 1)]; + global_vertices = new T[n_ranks * (n_vertices * balancer->get_dimension() + 1)]; } // collect all works and vertices on a single process - MPI_Gather(local_vertices, n_vertices * dimension + 1, mpi_data_type_T, - global_vertices,n_vertices * dimension + 1, mpi_data_type_T, + MPI_Gather(local_vertices, n_vertices * balancer->get_dimension() + 1, mpi_data_type_T, + global_vertices,n_vertices * balancer->get_dimension() + 1, mpi_data_type_T, 0, communicator ); @@ -1207,9 +835,9 @@ template <class T, class W> void ALL<T,W>::print_vtk_vertices(int step) for (int v = 0; v < n_vertices; ++v) { vtkpoints->InsertNextPoint( - global_vertices[ i * (n_vertices * dimension + 1) + v * dimension + 0 ], - global_vertices[ i * (n_vertices * dimension + 1) + v * dimension + 1 ], - global_vertices[ i * (n_vertices * dimension + 1) + v * dimension + 2 ] + global_vertices[ i * (n_vertices * balancer->get_dimension() + 1) + v * balancer->get_dimension() + 0 ], + global_vertices[ i * (n_vertices * balancer->get_dimension() + 1) + v * balancer->get_dimension() + 1 ], + global_vertices[ i * (n_vertices * balancer->get_dimension() + 1) + v * balancer->get_dimension() + 2 ] ); } } @@ -1266,8 +894,8 @@ template <class T, class W> void ALL<T,W>::print_vtk_vertices(int step) unstructuredGrid->InsertNextCell(VTK_POLYHEDRON, 8, pointIds, 12, faces->GetPointer()); - work->SetValue(n,global_vertices[ n * (n_vertices * dimension + 1) + - 8 * dimension ]); + work->SetValue(n,global_vertices[ n * (n_vertices * balancer->get_dimension() + 1) + + 8 * balancer->get_dimension() ]); cell->SetValue(n,(T)n); } unstructuredGrid->GetCellData()->AddArray(work); diff --git a/include/ALL_Histogram.hpp b/include/ALL_Histogram.hpp index 8f2213e69bee9b68a96a69157a1bd6deebec3ff8..6e9edd77aad9a1c22571f89a1654708825e8767b 100644 --- a/include/ALL_Histogram.hpp +++ b/include/ALL_Histogram.hpp @@ -34,96 +34,53 @@ POSSIBILITY OF SUCH DAMAGE. #include <mpi.h> #include <exception> #include <vector> +#include "ALL_LB.hpp" #include "ALL_CustomExceptions.hpp" // T: data type of vertices used in the balancer // W: data type of the work used in the balancer -template <class T, class W> class ALL_Histogram_LB +template <class T, class W> class ALL_Histogram_LB : public ALL_LB<T,W> { public: ALL_Histogram_LB() {} - ALL_Histogram_LB(int d, std::vector<W>* w, T g) : dimension(d), - work(*w), - gamma(g) + ALL_Histogram_LB(int d, std::vector<W> w, T g) : + ALL_LB<T,W>(d, g) { + this->set_work(w); // need the lower and upper bounds // of the domain for the tensor-based // load-balancing scheme - vertices = new std::vector<T>(2*dimension); - shifted_vertices = new std::vector<T>(2*dimension); - // periodicity of the MPI communicator / system - periodicity = new int[dimension]; - + // array of MPI communicators for each direction (to collect work // on each plane) - communicators = new MPI_Comm[2*dimension]; - n_neighbors = new std::vector<int>(2*dimension); - sys_size.resize(2*dimension); - n_bins = new std::vector<int>(dimension); + communicators.resize(2*this->dimension); + n_neighbors.resize(2*this->dimension); + n_bins.resize(this->dimension); } - ~ALL_Histogram_LB(); - - void set_vertices(T*); + ~ALL_Histogram_LB() = default; // setup communicators for the exchange of work in each domain - void setup(MPI_Comm); + void setup() override; // do a load-balancing step - virtual void balance(int); + void balance(int) override; // getter for variables (passed by reference to avoid // direct access to private members of the object) - // getter for base vertices - void get_vertices(T*); - // getter for shifted vertices - void get_shifted_vertices(T*); - // getter for shifted vertices - void get_shifted_vertices(std::vector<T>&); - // neighbors // provide list of neighbors - virtual void get_neighbors(std::vector<int>&); + void get_neighbors(std::vector<int>&) override; // provide list of neighbors in each direction - virtual void get_neighbors(int**); - - // set system size for computation of histogram slice thickness - void set_sys_size(std::vector<T>&); + void get_neighbors(int**) override; // set required data for the method: // int, int, int (n_bins for each direction) - void set_data(void*); + virtual void set_data(void*) override; private: - // dimension of the system - int dimension; - - // shift cooeficient - T gamma; - - // work distribution - std::vector<W> work; - - // vertices - std::vector<T>* vertices; - - // vertices after load-balancing shift - std::vector<T>* shifted_vertices; - - // MPI values - // global communicator - MPI_Comm global_comm; - // rank of the local domain in the communicator - int local_rank; - // global dimension of the system in each coordinate - int* global_dims; - // coordinates of the local domain in the system - int* local_coords; - // periodicity of the MPI communicator / system - int* periodicity; - - std::vector<int>* n_bins; + std::vector<int> n_bins; // type for MPI communication MPI_Datatype mpi_data_type_T; @@ -131,93 +88,38 @@ template <class T, class W> class ALL_Histogram_LB // array of MPI communicators for each direction (to collect work // on each plane) - MPI_Comm* communicators; + std::vector<MPI_Comm> communicators; // list of neighbors std::vector<int> neighbors; - std::vector<int>* n_neighbors; - - // size of system - std::vector<T> sys_size; + std::vector<int> n_neighbors; // find neighbors (more sophisticated than for tensor-product // variant of LB) void find_neighbors(); }; -template <class T, class W> ALL_Histogram_LB<T,W>::~ALL_Histogram_LB() -{ - if (vertices) delete vertices; - if (shifted_vertices) delete shifted_vertices; - if (global_dims) delete[] global_dims; - if (local_coords) delete[] local_coords; - if (periodicity) delete[] periodicity; - if (communicators) delete[] communicators; - if (n_neighbors) delete n_neighbors; - if (n_bins) delete n_bins; -} - -template <class T, class W> void ALL_Histogram_LB<T,W>::get_vertices(T* result) -{ - for (int i = 0; i < 2*dimension; ++i) - { - result[i] = vertices->at(i); - } -} - -template <class T, class W> void ALL_Histogram_LB<T,W>::get_shifted_vertices(T* result) -{ - for (int i = 0; i < 2*dimension; ++i) - { - result[i] = shifted_vertices->at(i); - } -} - -template <class T, class W> void ALL_Histogram_LB<T,W>::get_shifted_vertices(std::vector<T>& result) -{ - for (int i = 0; i < 2*dimension; ++i) - { - result.at(i) = shifted_vertices->at(i); - } -} - -// set the actual vertices (unsafe due to array copy) -template <class T, class W> void ALL_Histogram_LB<T,W>::set_vertices(T* v) -{ - for (int i = 0; i < 2*dimension; ++i) - { - vertices->at(i) = v[i]; - } -} - -template <class T, class W> void ALL_Histogram_LB<T,W>::set_sys_size(std::vector<T>& ss) -{ - for (auto i = 0; i < sys_size.size(); ++i) - { - sys_size.at(i) = ss.at(i); - } -} - template <class T, class W> void ALL_Histogram_LB<T,W>::set_data(void* data) { - if (n_bins->size() < 3) - n_bins->resize(3); + if (n_bins.size() < 3) + n_bins.resize(3); + for (int i = 0; i < 3; ++i) - n_bins->at(i) = *((int*)data+i); + n_bins.at(i) = *((int*)data+i); } // setup routine for the tensor-based load-balancing scheme // requires: -// global_comm (int): cartesian MPI communicator, from +// this->global_comm (int): cartesian MPI communicator, from // which separate sub communicators // are derived in order to represent // each plane of domains in the system -template <class T, class W> void ALL_Histogram_LB<T,W>::setup(MPI_Comm comm) +template <class T, class W> void ALL_Histogram_LB<T,W>::setup() { int status; // check if Communicator is cartesian - MPI_Topo_test(comm, &status); + MPI_Topo_test(this->global_comm, &status); if (status != MPI_CART) { throw ALL_Invalid_Comm_Type_Exception( @@ -226,20 +128,11 @@ template <class T, class W> void ALL_Histogram_LB<T,W>::setup(MPI_Comm comm) __LINE__, "Cartesian MPI communicator required, passed communicator is not cartesian"); } - - // allocate arrays to correct sizes - global_dims = new int[dimension]; - local_coords = new int[dimension]; - periodicity = new int[dimension]; - - // store MPI communicator to local variable - global_comm = comm; - - // get the local coordinates, periodicity and global size from the MPI communicator - MPI_Cart_get(global_comm, dimension, global_dims, periodicity, local_coords); + // get the local coordinates, this->periodicity and global size from the MPI communicator + MPI_Cart_get(this->global_comm, this->dimension, this->global_dims.data(), this->periodicity.data(), this->local_coords.data()); // get the local rank from the MPI communicator - MPI_Cart_rank(global_comm, local_coords, &local_rank); + MPI_Cart_rank(this->global_comm, this->local_coords.data(), &this->local_rank); // create sub-communicators @@ -249,16 +142,16 @@ template <class T, class W> void ALL_Histogram_LB<T,W>::setup(MPI_Comm comm) // reduction sub-communicators (equal to staggered grid) // z-plane - MPI_Comm_split(global_comm, - local_coords[2], - local_coords[0]+local_coords[1]*global_dims[0], - &communicators[2]); + MPI_Comm_split(this->global_comm, + this->local_coords.at(2), + this->local_coords.at(0)+this->local_coords.at(1)*this->global_dims.at(0), + &communicators.at(2)); // y-column - MPI_Comm_split(global_comm, - local_coords[2] * global_dims[1] +local_coords[1], - local_coords[0], - &communicators[1]); + MPI_Comm_split(this->global_comm, + this->local_coords.at(2) * this->global_dims.at(1) +this->local_coords.at(1), + this->local_coords.at(0), + &communicators.at(1)); // only cell itself communicators[0] = MPI_COMM_SELF; @@ -266,23 +159,22 @@ template <class T, class W> void ALL_Histogram_LB<T,W>::setup(MPI_Comm comm) // gathering sub-communicators // x-gathering (same y/z coordinates) - MPI_Comm_split(global_comm, - local_coords[1] + local_coords[2]*global_dims[1], - local_coords[0], - &communicators[3]); + MPI_Comm_split(this->global_comm, + this->local_coords.at(1) + this->local_coords.at(2)*this->global_dims.at(1), + this->local_coords.at(0), + &communicators.at(3)); // y-gathering - MPI_Comm_split(global_comm, - local_coords[0] + local_coords[2]*global_dims[0], - local_coords[1], - &communicators[4]); + MPI_Comm_split(this->global_comm, + this->local_coords.at(0) + this->local_coords.at(2)*this->global_dims.at(0), + this->local_coords.at(1), + &communicators.at(4)); // z-gathering - MPI_Comm_split(global_comm, - local_coords[0] + local_coords[1]*global_dims[0], - local_coords[2], - &communicators[5]); - + MPI_Comm_split(this->global_comm, + this->local_coords.at(0) + this->local_coords.at(1)*this->global_dims.at(0), + this->local_coords.at(2), + &communicators.at(5)); // determine correct MPI data type for template T if (std::is_same<T,double>::value) mpi_data_type_T = MPI_DOUBLE; @@ -316,9 +208,9 @@ template <class T, class W> void ALL_Histogram_LB<T,W>::setup(MPI_Comm comm) int rank_left, rank_right; neighbors.clear(); - for (int i = 0; i < dimension; ++i) + for (int i = 0; i < this->dimension; ++i) { - MPI_Cart_shift(global_comm,i,1,&rank_left,&rank_right); + MPI_Cart_shift(this->global_comm,i,1,&rank_left,&rank_right); neighbors.push_back(rank_left); neighbors.push_back(rank_right); } @@ -340,42 +232,44 @@ template<class T, class W> void ALL_Histogram_LB<T,W>::balance(int step) W work_deviation; // vector to store all values of the histogram for the current dimension - std::vector<W> work_dimension(n_bins->at(i)); + std::vector<W> work_dimension(n_bins.at(i)); // compute total number of bins in dimension int n_bins_dimension; - MPI_Allreduce(&(n_bins->at(i)), + MPI_Allreduce(&(n_bins.at(i)), &n_bins_dimension, 1, MPI_INT, MPI_SUM, - communicators[i+3]); + communicators.at(i+3)); std::vector<W> work_collection(n_bins_dimension); - std::vector<int> histogram_slices(global_dims[i]); - std::vector<W> work_new_dimension(global_dims[i], 0.0); + std::vector<int> histogram_slices(this->global_dims.at(i)); + std::vector<W> work_new_dimension(this->global_dims.at(i), 0.0); // collect how many bins from each process will be received - MPI_Allgather(&(n_bins->at(i)), + MPI_Allgather(&(n_bins.at(i)), 1, MPI_INT, histogram_slices.data(), 1, MPI_INT, - communicators[i+3]); - + communicators.at(i+3)); // add up work to get total work on domain - for ( int n = 0; n < n_bins->at(i); ++n ) - work_sum_local += work.at(n); - + for ( int n = 0; n < n_bins.at(i); ++n ) + { +// std::cout << "DEBUG: " << this->work.size() << " " << n << " " << i << " on " << this->local_rank << std::endl; + work_sum_local += this->work.at(n); + } + // compute total work in current dimension MPI_Allreduce(&work_sum_local, &work_sum_dimension, 1, mpi_data_type_W, MPI_SUM, - communicators[i]); + communicators.at(i)); // compute total work for current dimension MPI_Allreduce(&work_sum_dimension, @@ -383,31 +277,31 @@ template<class T, class W> void ALL_Histogram_LB<T,W>::balance(int step) 1, mpi_data_type_W, MPI_SUM, - communicators[i+3]); - - int avg_num = global_dims[i]; + communicators.at(i+3)); + + int avg_num = this->global_dims.at(i); work_avg_dimension = work_tot_dimension / (W)avg_num; // compute local slice of the histogram - MPI_Allreduce(work.data(), + MPI_Allreduce(this->work.data(), work_dimension.data(), - n_bins->at(i), + n_bins.at(i), mpi_data_type_W, MPI_SUM, - communicators[i]); + communicators.at(i)); // displacement array - std::vector<int> displs(global_dims[i],0); + std::vector<int> displs(this->global_dims.at(i),0); int tmp = 0; - for (int n = 0; n < global_dims[i]; ++n) + + for (int n = 0; n < this->global_dims.at(i); ++n) { displs[n] = tmp; tmp += histogram_slices.at(n); } - // gather complete histogram in current dimension MPI_Allgatherv(work_dimension.data(), - n_bins->at(i), + n_bins.at(i), mpi_data_type_W, work_collection.data(), histogram_slices.data(), @@ -417,26 +311,39 @@ template<class T, class W> void ALL_Histogram_LB<T,W>::balance(int step) int current_slice = 0; + double work_sum = 0.0; + // TODO: compute cumulative function - up to work_avg_dimension work in each box for (int idx = 0; idx < work_collection.size(); ++idx) { - work_new_dimension.at(current_slice) += work_collection.at(idx); + //work_new_dimension.at(current_slice) += work_collection.at(idx); + work_sum += work_collection.at(idx); /* - if (i == 1 && local_rank == 0) + if (i == 1 && this->local_rank == 0) std::cout << "DEBUG: " << current_slice << " " << idx << " " << work_new_dimension.at(current_slice) << " " << work_collection.at(idx+1) << " " << work_collection.size() << " " << work_avg_dimension << " " << work_tot_dimension << " " << work_sum_dimension << std::endl; */ if (idx < work_collection.size() - 1 && - work_new_dimension.at(current_slice) + work_collection.at(idx+1) - > work_avg_dimension + //work_new_dimension.at(current_slice) + work_collection.at(idx+1) + // > work_avg_dimension + work_sum + work_collection.at(idx+1) + > (current_slice+1) * work_avg_dimension ) { - histogram_slices.at(current_slice) = idx + 1; - if (current_slice == (global_dims[i] - 1)) + /* + double d1 = work_avg_dimension - work_new_dimension.at(current_slice); + double d2 = work_new_dimension.at(current_slice) + work_collection.at(idx+1) - + work_avg_dimension; + if (d1 >= d2) + histogram_slices.at(current_slice) = idx + 1; + else + */ + histogram_slices.at(current_slice) = idx; + if (current_slice == (this->global_dims.at(i) - 1)) { histogram_slices.at(current_slice) = work_collection.size(); W tmp_work = (W)0; - for (int j = 0; j < global_dims[i] - 1; ++j) + for (int j = 0; j < this->global_dims.at(i) - 1; ++j) tmp_work += work_new_dimension.at(j); work_new_dimension.at(current_slice) = work_tot_dimension - tmp_work; break; @@ -446,34 +353,32 @@ template<class T, class W> void ALL_Histogram_LB<T,W>::balance(int step) } // TODO: compute width of domain - T up = (local_coords[i] == global_dims[i]-1)?(T)work_collection.size():(T)histogram_slices.at(local_coords[i]); - T down = (local_coords[i] == 0)?(T)0:histogram_slices.at(local_coords[i]-1); + T up = (this->local_coords.at(i) == this->global_dims.at(i)-1)?(T)work_collection.size():(T)histogram_slices.at(this->local_coords.at(i)); + T down = (this->local_coords.at(i) == 0)?(T)0:histogram_slices.at(this->local_coords.at(i)-1); // size of one slice - T size = sys_size[2*i+1] / (T)work_collection.size(); + T size = this->sys_size[2*i+1] / (T)work_collection.size(); - // TODO: change vertices - for (int d = 0; d < 6; ++d) - shifted_vertices->at(d) = vertices->at(d); + this->shifted_vertices = this->vertices; - shifted_vertices->at(i) = (T)down * size; - shifted_vertices->at(3+i) = (T)up * size; + this->shifted_vertices.at(0).set_coordinate(i, (T)down * size); + this->shifted_vertices.at(1).set_coordinate(i, (T)up * size); // compute min / max work in current dimension - MPI_Allreduce(work_new_dimension.data()+local_coords[i], + MPI_Allreduce(work_new_dimension.data()+this->local_coords.at(i), &work_min_dimension, 1, mpi_data_type_W, MPI_MIN, - global_comm); - MPI_Allreduce(work_new_dimension.data()+local_coords[i], + this->global_comm); + MPI_Allreduce(work_new_dimension.data()+this->local_coords.at(i), &work_max_dimension, 1, mpi_data_type_W, MPI_MAX, - global_comm); + this->global_comm); - if (local_rank == 0) std::cout << "HISTOGRAM: " + if (this->local_rank == 0) std::cout << "HISTOGRAM: " << i << " " << work_min_dimension << " " << work_max_dimension << " " @@ -482,6 +387,15 @@ template<class T, class W> void ALL_Histogram_LB<T,W>::balance(int step) << std::endl; find_neighbors(); + + MPI_Barrier(this->global_comm); + // free created communicators + for (int i = 0; i < 6; ++i) + { + if (communicators.at(i) != MPI_COMM_SELF && communicators.at(i) != MPI_COMM_WORLD) + MPI_Comm_free(&(communicators.at(i))); + } + } template<class T, class W> void ALL_Histogram_LB<T,W>::find_neighbors() @@ -492,7 +406,7 @@ template<class T, class W> void ALL_Histogram_LB<T,W>::find_neighbors() MPI_Status sstat, rstat; // array to store neighbor vertices in Y/Z direction (reused) T* vertices_loc = new T[4]; - T* vertices_rem = new T[8*global_dims[0]*global_dims[1]]; + T* vertices_rem = new T[8*this->global_dims.at(0)*this->global_dims.at(1)]; int rem_rank; int rem_coords[3]; @@ -505,61 +419,64 @@ template<class T, class W> void ALL_Histogram_LB<T,W>::find_neighbors() int offset_coords[3]; // X-neighbors are static - n_neighbors->at(0) = n_neighbors->at(1) = 1; + n_neighbors.at(0) = n_neighbors.at(1) = 1; // find X-neighbors - MPI_Cart_shift(global_comm,0,1,&rank_left,&rank_right); + MPI_Cart_shift(this->global_comm,0,1,&rank_left,&rank_right); // store X-neighbors neighbors.push_back(rank_left); neighbors.push_back(rank_right); // find Y-neighbors to get border information from - MPI_Cart_shift(global_comm,1,1,&rank_left,&rank_right); + MPI_Cart_shift(this->global_comm,1,1,&rank_left,&rank_right); // collect border information from local column - vertices_loc[0] = shifted_vertices->at(0); - vertices_loc[1] = shifted_vertices->at(dimension); + vertices_loc[0] = this->shifted_vertices.at(0).x(0); + vertices_loc[1] = this->shifted_vertices.at(1).x(0); MPI_Allgather(vertices_loc, 2, mpi_data_type_T, - vertices_rem+2*global_dims[0], + vertices_rem+2*this->global_dims.at(0), 2, mpi_data_type_T, - communicators[1]); + communicators.at(1)); // exchange local column information with upper neighbor in Y direction (cart grid) MPI_Irecv(vertices_rem, - 2*global_dims[0], + 2*this->global_dims.at(0), mpi_data_type_T, rank_left, 0, - global_comm, + this->global_comm, &rreq); - MPI_Isend(vertices_rem+2*global_dims[0], - 2*global_dims[0], + MPI_Isend(vertices_rem+2*this->global_dims.at(0), + 2*this->global_dims.at(0), mpi_data_type_T, rank_right, 0, - global_comm,&sreq); + this->global_comm,&sreq); // determine the offset in ranks offset_coords[0] = 0; - offset_coords[1] = local_coords[1] - 1; - offset_coords[2] = local_coords[2]; + offset_coords[1] = this->local_coords.at(1) - 1; + offset_coords[2] = this->local_coords.at(2); rem_coords[1] = offset_coords[1]; rem_coords[2] = offset_coords[2]; - MPI_Cart_rank(global_comm,offset_coords,&rank_offset); + MPI_Cart_rank(this->global_comm,offset_coords,&rank_offset); + MPI_Barrier(this->global_comm); + if (this->local_rank == 0) std::cout << "HISTOGRAM: neighbor_find y-communication" + << std::endl; // wait for communication MPI_Wait(&sreq,&sstat); MPI_Wait(&rreq,&rstat); // iterate about neighbor borders to determine the neighborship relation - n_neighbors->at(2) = 0; - for (int x = 0; x < global_dims[0]; ++x) + n_neighbors.at(2) = 0; + for (int x = 0; x < this->global_dims.at(0); ++x) { if ( ( vertices_rem[2*x] <= vertices_loc[0] && vertices_loc[0] < vertices_rem[2*x+1] ) || @@ -568,49 +485,52 @@ template<class T, class W> void ALL_Histogram_LB<T,W>::find_neighbors() vertices_loc[1] >= vertices_rem[2*x+1] ) ) { - n_neighbors->at(2)++; + n_neighbors.at(2)++; rem_coords[0] = x; - MPI_Cart_rank(global_comm,rem_coords,&rem_rank); + MPI_Cart_rank(this->global_comm,rem_coords,&rem_rank); neighbors.push_back(rem_rank); } } // barrier to ensure every process concluded the calculations before overwriting remote borders! - MPI_Barrier(global_comm); + MPI_Barrier(this->global_comm); // exchange local column information with lower neighbor in Y direction (cart grid) MPI_Irecv(vertices_rem, - 2*global_dims[0], + 2*this->global_dims.at(0), mpi_data_type_T, rank_right, 0, - global_comm, + this->global_comm, &rreq); - MPI_Isend(vertices_rem+2*global_dims[0], - 2*global_dims[0], + MPI_Isend(vertices_rem+2*this->global_dims.at(0), + 2*this->global_dims.at(0), mpi_data_type_T, rank_left, 0, - global_comm, + this->global_comm, &sreq); // determine the offset in ranks offset_coords[0] = 0; - offset_coords[1] = local_coords[1] + 1; - offset_coords[2] = local_coords[2]; + offset_coords[1] = this->local_coords.at(1) + 1; + offset_coords[2] = this->local_coords.at(2); rem_coords[1] = offset_coords[1]; rem_coords[2] = offset_coords[2]; - MPI_Cart_rank(global_comm,offset_coords,&rank_offset); + MPI_Cart_rank(this->global_comm,offset_coords,&rank_offset); + MPI_Barrier(this->global_comm); + if (this->local_rank == 0) std::cout << "HISTOGRAM: neighbor_find y-communication 2" + << std::endl; // wait for communication MPI_Wait(&sreq,&sstat); MPI_Wait(&rreq,&rstat); // iterate about neighbor borders to determine the neighborship relation - n_neighbors->at(3) = 0; - for (int x = 0; x < global_dims[0]; ++x) + n_neighbors.at(3) = 0; + for (int x = 0; x < this->global_dims.at(0); ++x) { if ( ( vertices_rem[2*x] <= vertices_loc[0] && vertices_loc[0] < vertices_rem[2*x+1] ) || @@ -619,203 +539,212 @@ template<class T, class W> void ALL_Histogram_LB<T,W>::find_neighbors() vertices_loc[1] >= vertices_rem[2*x+1] ) ) { - n_neighbors->at(3)++; + n_neighbors.at(3)++; rem_coords[0] = x; - MPI_Cart_rank(global_comm,rem_coords,&rem_rank); + MPI_Cart_rank(this->global_comm,rem_coords,&rem_rank); neighbors.push_back(rem_rank); } } // barrier to ensure every process concluded the calculations before overwriting remote borders! - MPI_Barrier(global_comm); + MPI_Barrier(this->global_comm); // find Z-neighbors to get border information from - MPI_Cart_shift(global_comm,2,1,&rank_left,&rank_right); + MPI_Cart_shift(this->global_comm,2,1,&rank_left,&rank_right); // collect border information from local column - vertices_loc[0] = shifted_vertices->at(0); - vertices_loc[1] = shifted_vertices->at(dimension); - vertices_loc[2] = shifted_vertices->at(1); - vertices_loc[3] = shifted_vertices->at(1+dimension); + vertices_loc[0] = this->shifted_vertices.at(0).x(0); + vertices_loc[1] = this->shifted_vertices.at(1).x(0); + vertices_loc[2] = this->shifted_vertices.at(0).x(1); + vertices_loc[3] = this->shifted_vertices.at(1).x(1); - MPI_Barrier(global_comm); + MPI_Barrier(this->global_comm); MPI_Allgather(vertices_loc, 4, mpi_data_type_T, - vertices_rem+4*global_dims[0]*global_dims[1], + vertices_rem+4*this->global_dims.at(0)*this->global_dims.at(1), 4, mpi_data_type_T, - communicators[2]); + communicators.at(2)); // exchange local column information with upper neighbor in Z direction (cart grid) MPI_Irecv(vertices_rem, - 4*global_dims[0]*global_dims[1], + 4*this->global_dims.at(0)*this->global_dims.at(1), mpi_data_type_T, rank_left, 0, - global_comm, + this->global_comm, &rreq); - MPI_Isend(vertices_rem+4*global_dims[0]*global_dims[1], - 4*global_dims[0]*global_dims[1], + MPI_Isend(vertices_rem+4*this->global_dims.at(0)*this->global_dims.at(1), + 4*this->global_dims.at(0)*this->global_dims.at(1), mpi_data_type_T, rank_right, 0, - global_comm, + this->global_comm, &sreq); // determine the offset in ranks offset_coords[0] = 0; offset_coords[1] = 0; - offset_coords[2] = local_coords[2]-1; + offset_coords[2] = this->local_coords.at(2)-1; rem_coords[2] = offset_coords[2]; - MPI_Cart_rank(global_comm,offset_coords,&rank_offset); + MPI_Cart_rank(this->global_comm,offset_coords,&rank_offset); + MPI_Barrier(this->global_comm); + if (this->local_rank == 0) std::cout << "HISTOGRAM: neighbor_find z-communication" + << std::endl; // wait for communication MPI_Wait(&sreq,&sstat); MPI_Wait(&rreq,&rstat); // iterate about neighbor borders to determine the neighborship relation - n_neighbors->at(4) = 0; - for (int y = 0; y < global_dims[1]; ++y) + n_neighbors.at(4) = 0; + for (int y = 0; y < this->global_dims.at(1); ++y) { - for (int x = 0; x < global_dims[0]; ++x) + for (int x = 0; x < this->global_dims.at(0); ++x) { if ( ( ( - vertices_rem[4*(x+y*global_dims[0])+2] <= vertices_loc[2] && - vertices_loc[2] < vertices_rem[4*(x+y*global_dims[0])+3] + vertices_rem[4*(x+y*this->global_dims.at(0))+2] <= vertices_loc[2] && + vertices_loc[2] < vertices_rem[4*(x+y*this->global_dims.at(0))+3] ) || ( - vertices_rem[4*(x+y*global_dims[0])+2] < vertices_loc[3] && - vertices_loc[3] <= vertices_rem[4*(x+y*global_dims[0])+3] + vertices_rem[4*(x+y*this->global_dims.at(0))+2] < vertices_loc[3] && + vertices_loc[3] <= vertices_rem[4*(x+y*this->global_dims.at(0))+3] ) || ( - vertices_rem[4*(x+y*global_dims[0])+2] >= vertices_loc[2] && - vertices_loc[2] < vertices_rem[4*(x+y*global_dims[0])+3] && - vertices_loc[3] >= vertices_rem[4*(x+y*global_dims[0])+3] + vertices_rem[4*(x+y*this->global_dims.at(0))+2] >= vertices_loc[2] && + vertices_loc[2] < vertices_rem[4*(x+y*this->global_dims.at(0))+3] && + vertices_loc[3] >= vertices_rem[4*(x+y*this->global_dims.at(0))+3] ) ) ) if ( ( ( - vertices_rem[4*(x+y*global_dims[0])] <= vertices_loc[0] && - vertices_loc[0] < vertices_rem[4*(x+y*global_dims[0])+1] + vertices_rem[4*(x+y*this->global_dims.at(0))] <= vertices_loc[0] && + vertices_loc[0] < vertices_rem[4*(x+y*this->global_dims.at(0))+1] ) || ( - vertices_rem[4*(x+y*global_dims[0])] < vertices_loc[1] && - vertices_loc[1] <= vertices_rem[4*(x+y*global_dims[0])+1] + vertices_rem[4*(x+y*this->global_dims.at(0))] < vertices_loc[1] && + vertices_loc[1] <= vertices_rem[4*(x+y*this->global_dims.at(0))+1] ) || ( - vertices_rem[4*(x+y*global_dims[0])] >= vertices_loc[0] && - vertices_loc[0] < vertices_rem[4*(x+y*global_dims[0])+1] && - vertices_loc[1] >= vertices_rem[4*(x+y*global_dims[0])+1] + vertices_rem[4*(x+y*this->global_dims.at(0))] >= vertices_loc[0] && + vertices_loc[0] < vertices_rem[4*(x+y*this->global_dims.at(0))+1] && + vertices_loc[1] >= vertices_rem[4*(x+y*this->global_dims.at(0))+1] ) ) ) { - n_neighbors->at(4)++; + n_neighbors.at(4)++; rem_coords[1] = y; rem_coords[0] = x; - MPI_Cart_rank(global_comm,rem_coords,&rem_rank); + MPI_Cart_rank(this->global_comm,rem_coords,&rem_rank); neighbors.push_back(rem_rank); } } } // barrier to ensure every process concluded the calculations before overwriting remote borders! - MPI_Barrier(global_comm); + MPI_Barrier(this->global_comm); // exchange local column information with upper neighbor in Y direction (cart grid) MPI_Irecv(vertices_rem, - 4*global_dims[0]*global_dims[1], + 4*this->global_dims.at(0)*this->global_dims.at(1), mpi_data_type_T, rank_right, 0, - global_comm, + this->global_comm, &rreq); - MPI_Isend(vertices_rem+4*global_dims[0]*global_dims[1], - 4*global_dims[0]*global_dims[1], + MPI_Isend(vertices_rem+4*this->global_dims.at(0)*this->global_dims.at(1), + 4*this->global_dims.at(0)*this->global_dims.at(1), mpi_data_type_T, rank_left, 0, - global_comm, + this->global_comm, &sreq); // determine the offset in ranks offset_coords[0] = 0; offset_coords[1] = 0; - offset_coords[2] = local_coords[2]+1; + offset_coords[2] = this->local_coords.at(2)+1; rem_coords[2] = offset_coords[2]; - MPI_Cart_rank(global_comm,offset_coords,&rank_offset); + MPI_Cart_rank(this->global_comm,offset_coords,&rank_offset); + + MPI_Barrier(this->global_comm); + if (this->local_rank == 0) std::cout << "HISTOGRAM: neighbor_find z-communication 2" + << std::endl; // wait for communication MPI_Wait(&sreq,&sstat); MPI_Wait(&rreq,&rstat); + if (this->local_rank == 0) std::cout << "HISTOGRAM: neighbor_find z-communication 2" + << std::endl; // iterate about neighbor borders to determine the neighborship relation - n_neighbors->at(5) = 0; - for (int y = 0; y < global_dims[1]; ++y) + n_neighbors.at(5) = 0; + for (int y = 0; y < this->global_dims.at(1); ++y) { - for (int x = 0; x < global_dims[0]; ++x) + for (int x = 0; x < this->global_dims.at(0); ++x) { if ( ( ( - vertices_rem[4*(x+y*global_dims[0])+2] <= vertices_loc[2] && - vertices_loc[2] < vertices_rem[4*(x+y*global_dims[0])+3] + vertices_rem[4*(x+y*this->global_dims.at(0))+2] <= vertices_loc[2] && + vertices_loc[2] < vertices_rem[4*(x+y*this->global_dims.at(0))+3] ) || ( - vertices_rem[4*(x+y*global_dims[0])+2] < vertices_loc[3] && - vertices_loc[3] <= vertices_rem[4*(x+y*global_dims[0])+3] + vertices_rem[4*(x+y*this->global_dims.at(0))+2] < vertices_loc[3] && + vertices_loc[3] <= vertices_rem[4*(x+y*this->global_dims.at(0))+3] ) || ( - vertices_rem[4*(x+y*global_dims[0])+2] >= vertices_loc[2] && - vertices_loc[2] < vertices_rem[4*(x+y*global_dims[0])+3] && - vertices_loc[3] >= vertices_rem[4*(x+y*global_dims[0])+3] + vertices_rem[4*(x+y*this->global_dims.at(0))+2] >= vertices_loc[2] && + vertices_loc[2] < vertices_rem[4*(x+y*this->global_dims.at(0))+3] && + vertices_loc[3] >= vertices_rem[4*(x+y*this->global_dims.at(0))+3] ) ) ) if ( ( ( - vertices_rem[4*(x+y*global_dims[0])] <= vertices_loc[0] && - vertices_loc[0] < vertices_rem[4*(x+y*global_dims[0])+1] + vertices_rem[4*(x+y*this->global_dims.at(0))] <= vertices_loc[0] && + vertices_loc[0] < vertices_rem[4*(x+y*this->global_dims.at(0))+1] ) || ( - vertices_rem[4*(x+y*global_dims[0])] < vertices_loc[1] && - vertices_loc[1] <= vertices_rem[4*(x+y*global_dims[0])+1] + vertices_rem[4*(x+y*this->global_dims.at(0))] < vertices_loc[1] && + vertices_loc[1] <= vertices_rem[4*(x+y*this->global_dims.at(0))+1] ) || ( - vertices_rem[4*(x+y*global_dims[0])] >= vertices_loc[0] && - vertices_loc[0] < vertices_rem[4*(x+y*global_dims[0])+1] && - vertices_loc[1] >= vertices_rem[4*(x+y*global_dims[0])+1] + vertices_rem[4*(x+y*this->global_dims.at(0))] >= vertices_loc[0] && + vertices_loc[0] < vertices_rem[4*(x+y*this->global_dims.at(0))+1] && + vertices_loc[1] >= vertices_rem[4*(x+y*this->global_dims.at(0))+1] ) ) ) { - n_neighbors->at(5)++; + n_neighbors.at(5)++; rem_coords[1] = y; rem_coords[0] = x; - MPI_Cart_rank(global_comm,rem_coords,&rem_rank); + MPI_Cart_rank(this->global_comm,rem_coords,&rem_rank); neighbors.push_back(rem_rank); } } } // barrier to ensure every process concluded the calculations before overwriting remote borders! - MPI_Barrier(global_comm); + MPI_Barrier(this->global_comm); // clear up vertices array - delete vertices_loc; - delete vertices_rem; + delete[] vertices_loc; + delete[] vertices_rem; } // provide list of neighbors @@ -826,7 +755,7 @@ template<class T, class W> void ALL_Histogram_LB<T,W>::get_neighbors(std::vector // provide list of neighbors in each direction template<class T, class W> void ALL_Histogram_LB<T,W>::get_neighbors(int** ret) { - *ret = n_neighbors->data(); + *ret = n_neighbors.data(); } #endif diff --git a/include/ALL_LB.hpp b/include/ALL_LB.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1db74299dfe011c83bfca308962518571062eaaf --- /dev/null +++ b/include/ALL_LB.hpp @@ -0,0 +1,156 @@ +/* +Copyright 2018 Rene Halver, Forschungszentrum Juelich GmbH, Germany +Copyright 2018 Godehard Sutmann, Forschungszentrum Juelich GmbH, Germany + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, +OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + + +#ifndef ALL_LB_HEADER_INCLUDED +#define ALL_LB_HEADER_INCLUDED + +#include <vector> +#include "ALL_Point.hpp" +#include "mpi.h" + +// T: data type of vertices used in the balancer +// W: data type of the work used in the balancer +template <class T, class W> class ALL_LB { + public: + ALL_LB(int dim, T g) : + gamma(g), + dimension(dim) + { + // set allowed minimum size to zero + min_size = std::vector<T>(dim,0.0); + periodicity.resize(dim); + local_coords.resize(dim); + global_dims.resize(dim); + } + virtual ~ALL_LB() = default; + virtual void setup() = 0; + virtual void balance(int step) = 0; + // neighbors + // provide list of neighbors + virtual void get_neighbors(std::vector<int>&) = 0; + // provide list of neighbors in each direction + virtual void get_neighbors(int**) = 0; + // setting / updating vertices + virtual void set_vertices(std::vector<ALL_Point<T>>&); + + // global getter / setter functions + virtual void set_dimension(const int d) {dimension = d;} + virtual int get_dimension() {return dimension;} + + virtual void set_gamma(const T g) {gamma = g;} + virtual T get_gamma() {return gamma;} + + virtual void set_work(const std::vector<W>& w) {work = w;} + virtual void set_work(const W w) {work.resize(1); work.at(0) = w;} + + virtual void set_min_domain_size(const std::vector<T>& _min_size) + {min_size = _min_size;} + virtual const std::vector<T>& get_min_domain_size() + {return min_size;} + + virtual void set_sys_size(const std::vector<T>& _sys_size) + {sys_size = _sys_size;} + virtual const std::vector<T>& get_sys_size() + {return sys_size;} + + virtual std::vector<W>& get_work() {return work;} + + //virtual void set_vertices(std::vector<T>&); + virtual std::vector<ALL_Point<T>>& get_vertices(); + virtual std::vector<ALL_Point<T>>& get_shifted_vertices(); + + virtual void set_communicator(MPI_Comm comm) + { + // store communicator + global_comm = comm; + // get local rank within communicator + MPI_Comm_rank(comm,&local_rank); + } + + void resize_shifted_vertices(int); + + int get_n_vertices() {return vertices.size();} + + virtual void set_data(void*) = 0; + +protected: + // dimension of the system + int dimension; + // correction factor / histogram bin size + T gamma; + // used communicator + MPI_Comm global_comm; + // local rank + int local_rank; + // if function supports a minimum domain size, use this + std::vector<T> min_size; + // system size + std::vector<T> sys_size; + // work array + std::vector<W> work; + // vertices + std::vector<ALL_Point<T>> vertices; + // shifted vertices + std::vector<ALL_Point<T>> shifted_vertices; + // global dimension of the system in each coordinate + std::vector<int> global_dims; + // coordinates of the local domain in the system + std::vector<int> local_coords; + // periodicity of the MPI communicator / system + std::vector<int> periodicity; +}; + +// set the actual vertices (unsafe due to array copy) +template <class T, class W> void ALL_LB<T,W>::set_vertices(std::vector<ALL_Point<T>>& vertices_in) +{ + vertices = vertices_in; + // as a basic assumption the number of shifted vertices + // is equal to the number of input vertices: + // exceptions: + // VORONOI + shifted_vertices = vertices; +} + +template <class T, class W> std::vector<ALL_Point<T>>& ALL_LB<T,W>::get_vertices() +{ + return vertices; +} + +template <class T, class W> std::vector<ALL_Point<T>>& ALL_LB<T,W>::get_shifted_vertices() +{ + return shifted_vertices; +} + +template <class T, class W> void ALL_LB<T,W>::resize_shifted_vertices(int new_size) +{ + shifted_vertices.resize(new_size); +} +#endif // ALL_LB_HEADER_INCLUDED diff --git a/include/ALL_Point.hpp b/include/ALL_Point.hpp index 8f77e71a08fa6152e8b82e6b03056a38f8b3d6e9..bc37b5cfb3e0932a6009a588c7c571f1b51d3734 100644 --- a/include/ALL_Point.hpp +++ b/include/ALL_Point.hpp @@ -57,10 +57,16 @@ template <class T> class ALL_Point ALL_Point(const std::vector<T>&); // constructor for ALL_Point with given coordinates with weight (not secure) ALL_Point(const int,const T*, const T); + // constructor for ALL_Point with given coordinates with weight (not secure) + ALL_Point(const int,const T*, const T, const long); // constructor for ALL_Point with given coordinates with weight (std::list) ALL_Point(const int,const std::list<T>&, const T); + // constructor for ALL_Point with given coordinates with weight (std::list) + ALL_Point(const int,const std::list<T>&, const T, const long); // constructor for ALL_Point with given coordinates with weight (std::vector) ALL_Point(const std::vector<T>&, const T); + // constructor for ALL_Point with given coordinates with weight (std::vector) + ALL_Point(const std::vector<T>&, const T, const long); // destructor ~ALL_Point<T>() {}; @@ -68,6 +74,8 @@ template <class T> class ALL_Point void set_dimension(const int d) {dimension = d; coordinates.resize(d);} // set weight of point void set_weight(const T w) {weight = w;} + // set ID with a long + void set_id (const long i) {id = i;} // set coordinates with an array (not secure) void set_coordinates(const T*); // set coordinates with a std::list @@ -79,6 +87,8 @@ template <class T> class ALL_Point T get_coordinate(const int) const; // get weight T get_weight() const {return weight;} + // get ID + long get_id() const {return id;} // set a single coordinate void set_coordinate(const int,const T&); @@ -101,6 +111,8 @@ template <class T> class ALL_Point // dimension of the ALL_Point int dimension; + // id + long id; // array containg the coordinates std::vector<T> coordinates; // weight (e.g. number of interactions, sites in case of block-based @@ -136,18 +148,36 @@ template <class T> ALL_Point<T>::ALL_Point(const int d, const T* data, const T w weight = w; } +template <class T> ALL_Point<T>::ALL_Point(const int d, const T* data, const T w, const long i) + : ALL_Point<T>(d,data,w) +{ + id = i; +} + template <class T> ALL_Point<T>::ALL_Point(const int d, const std::list<T>& data, const T w) : ALL_Point<T>(d,data) { weight = w; } +template <class T> ALL_Point<T>::ALL_Point(const int d, const std::list<T>& data, const T w, const long i) + : ALL_Point<T>(d,data,w) +{ + id = i; +} + template <class T> ALL_Point<T>::ALL_Point(const std::vector<T>& data, const T w) : ALL_Point<T>(d,data) { weight = w; } +template <class T> ALL_Point<T>::ALL_Point(const std::vector<T>& data, const T w, const long i) + : ALL_Point<T>(d,data,w) +{ + id = i; +} + // unsafe version: no bound checking for data possible // data needs to be at least of length dimension template <class T> void ALL_Point<T>::set_coordinates(const T* data) diff --git a/include/ALL_Staggered.hpp b/include/ALL_Staggered.hpp index 7f66ef852713d5ab49ff4082f746d81912865ed7..5b159842bf2f5eadcc9853adb9a399656fd8ec0a 100644 --- a/include/ALL_Staggered.hpp +++ b/include/ALL_Staggered.hpp @@ -35,181 +35,87 @@ POSSIBILITY OF SUCH DAMAGE. #include <exception> #include <vector> #include "ALL_CustomExceptions.hpp" +#include "ALL_LB.hpp" // T: data type of vertices used in the balancer // W: data type of the work used in the balancer -template <class T, class W> class ALL_Staggered_LB +template <class T, class W> class ALL_Staggered_LB : public ALL_LB<T,W> { public: ALL_Staggered_LB() {} - ALL_Staggered_LB(int d, W w, T g) : dimension(d), - work(w), - gamma(g) + ALL_Staggered_LB(int d, W w, T g) : + ALL_LB<T,W>(d,g) { + this->set_work(w); // need the lower and upper bounds // of the domain for the tensor-based // load-balancing scheme - vertices = std::vector<T>(6); - shifted_vertices = std::vector<T>(6); - // global dimension of the system in each coordinate - global_dims = new int[3]; - // coordinates of the local domain in the system - local_coords = new int[3]; - // periodicity of the MPI communicator / system - periodicity = new int[3]; - + // array of MPI communicators for each direction (to collect work // on each plane) - communicators = new MPI_Comm[3]; - n_neighbors = new std::vector<int>(6); - - // allocate min size array - min_size = new T[d]; + communicators.resize(d); + n_neighbors.resize(2*d); } - ~ALL_Staggered_LB(); - - void set_vertices(T*); + ~ALL_Staggered_LB() = default;; // setup communicators for the exchange of work in each domain - void setup(MPI_Comm); + void setup() override; // do a load-balancing step - virtual void balance(); + void balance(int step) override; // getter for variables (passed by reference to avoid // direct access to private members of the object) - // getter for base vertices - void get_vertices(T*); - // getter for shifted vertices - void get_shifted_vertices(T*); - // getter for shifted vertices - void get_shifted_vertices(std::vector<T>&); - - // set minimum domain size - void set_min_domain_size(T*); - // neighbors // provide list of neighbors - virtual void get_neighbors(std::vector<int>&); + virtual void get_neighbors(std::vector<int>&) override; // provide list of neighbors in each direction - virtual void get_neighbors(int**); - + virtual void get_neighbors(int**) override; + // empty override for setting method specific data + virtual void set_data(void*) override {} private: - // dimension of the system - int dimension; - - // shift cooeficient - T gamma; - - // work per domain - W work; - - // vertices - std::vector<T> vertices; - - // vertices after load-balancing shift - std::vector<T> shifted_vertices; - - // MPI values - // global communicator - MPI_Comm global_comm; - // rank of the local domain in the communicator - int local_rank; - // global dimension of the system in each coordinate - int* global_dims; - // coordinates of the local domain in the system - int* local_coords; - // periodicity of the MPI communicator / system - int* periodicity; - - // type for MPI communication + // type for MPI communication MPI_Datatype mpi_data_type_T; MPI_Datatype mpi_data_type_W; // array of MPI communicators for each direction (to collect work // on each plane) - MPI_Comm* communicators; + std::vector<MPI_Comm> communicators; // list of neighbors std::vector<int> neighbors; - std::vector<int>* n_neighbors; + std::vector<int> n_neighbors; // find neighbors (more sophisticated than for tensor-product // variant of LB) void find_neighbors(); - - // minimum domain size - T* min_size; }; +/* template <class T, class W> ALL_Staggered_LB<T,W>::~ALL_Staggered_LB() { - delete[] global_dims; - delete[] local_coords; - delete[] periodicity; - - for (int i = 1; i < 3; ++i) - MPI_Comm_free(communicators + i); - delete[] communicators; - - delete n_neighbors; - delete[] min_size; -} - -template <class T, class W> void ALL_Staggered_LB<T,W>::get_vertices(T* result) -{ - for (int i = 0; i < 2*dimension; ++i) - { - result[i] = vertices.at(i); - } -} - -template <class T, class W> void ALL_Staggered_LB<T,W>::set_min_domain_size(T* min_size) -{ - for (int i = 0; i < dimension; ++i) - this->min_size[i] = min_size[i]; -} - -template <class T, class W> void ALL_Staggered_LB<T,W>::get_shifted_vertices(T* result) -{ - for (int i = 0; i < 2*dimension; ++i) - { - result[i] = shifted_vertices.at(i); - } -} - -template <class T, class W> void ALL_Staggered_LB<T,W>::get_shifted_vertices(std::vector<T>& result) -{ - for (int i = 0; i < 2*dimension; ++i) - { - result.at(i) = shifted_vertices.at(i); - } -} - -// set the actual vertices (unsafe due to array copy) -template <class T, class W> void ALL_Staggered_LB<T,W>::set_vertices(T* v) -{ - for (int i = 0; i < 2*dimension; ++i) - { - vertices.at(i) = v[i]; - } + int dimension = this->get_dimension(); + for (int i = 1; i < dimension; ++i) + MPI_Comm_free(&communicators.at(i)); } +*/ // setup routine for the tensor-based load-balancing scheme // requires: -// global_comm (int): cartesian MPI communicator, from +// this->global_comm (int): cartesian MPI communicator, from // which separate sub communicators // are derived in order to represent // each plane of domains in the system -template <class T, class W> void ALL_Staggered_LB<T,W>::setup(MPI_Comm comm) +template <class T, class W> void ALL_Staggered_LB<T,W>::setup() { int status; + int dimension = this->get_dimension(); // check if Communicator is cartesian - MPI_Topo_test(comm, &status); + MPI_Topo_test(this->global_comm, &status); if (status != MPI_CART) { throw ALL_Invalid_Comm_Type_Exception( @@ -219,27 +125,24 @@ template <class T, class W> void ALL_Staggered_LB<T,W>::setup(MPI_Comm comm) "Cartesian MPI communicator required, passed communicator is not cartesian"); } - // store MPI communicator to local variable - global_comm = comm; - // get the local coordinates, periodicity and global size from the MPI communicator - MPI_Cart_get(global_comm, dimension, global_dims, periodicity, local_coords); + MPI_Cart_get(this->global_comm, dimension, this->global_dims.data(), this->periodicity.data(), this->local_coords.data()); // get the local rank from the MPI communicator - MPI_Cart_rank(global_comm, local_coords, &local_rank); + MPI_Cart_rank(this->global_comm, this->local_coords.data(), &this->local_rank); // create sub-communicators // z-plane - MPI_Comm_split(global_comm,local_coords[2],local_coords[0]+local_coords[1]*global_dims[0],&communicators[2]); + MPI_Comm_split(this->global_comm,this->local_coords.at(2),this->local_coords.at(0)+this->local_coords.at(1)*this->global_dims.at(0),&communicators.at(2)); // y-column - MPI_Comm_split(global_comm,local_coords[2] * global_dims[1] - +local_coords[1] ,local_coords[0], - &communicators[1]); + MPI_Comm_split(this->global_comm,this->local_coords.at(2) * this->global_dims.at(1) + +this->local_coords.at(1) ,this->local_coords.at(0), + &communicators.at(1)); // only cell itself - communicators[0] = MPI_COMM_SELF; + communicators.at(0) = MPI_COMM_SELF; // determine correct MPI data type for template T if (std::is_same<T,double>::value) mpi_data_type_T = MPI_DOUBLE; @@ -275,105 +178,131 @@ template <class T, class W> void ALL_Staggered_LB<T,W>::setup(MPI_Comm comm) neighbors.clear(); for (int i = 0; i < dimension; ++i) { - MPI_Cart_shift(global_comm,i,1,&rank_left,&rank_right); + MPI_Cart_shift(this->global_comm,i,1,&rank_left,&rank_right); neighbors.push_back(rank_left); neighbors.push_back(rank_right); } } -template<class T, class W> void ALL_Staggered_LB<T,W>::balance() +template<class T, class W> void ALL_Staggered_LB<T,W>::balance(int) { + int dimension = this->get_dimension(); + // loop over all available dimensions for (int i = 0; i < dimension; ++i) { W work_local_plane; +#ifdef ALL_DEBUG_ENABLED + MPI_Barrier(this->global_comm); + if( this->local_rank == 0 ) std::cout << "ALL_Staggered::balance(): before work computation..." << std::endl; +#endif // collect work from all processes in the same plane - MPI_Allreduce(&work,&work_local_plane,1,mpi_data_type_W,MPI_SUM,communicators[i]); + MPI_Allreduce(this->work.data(),&work_local_plane,1,mpi_data_type_W,MPI_SUM,communicators[i]); +#ifdef ALL_DEBUG_ENABLED + MPI_Barrier(this->global_comm); + if( this->local_rank == 0 ) std::cout << "ALL_Staggered::balance(): before work distribution..." << std::endl; +#endif // correct right border: W remote_work; - T size; + T local_size; T remote_size; // determine neighbors int rank_left, rank_right; - MPI_Cart_shift(global_comm,i,1,&rank_left,&rank_right); + MPI_Cart_shift(this->global_comm,i,1,&rank_left,&rank_right); // collect work from right neighbor plane MPI_Request sreq, rreq; MPI_Status sstat, rstat; - MPI_Irecv(&remote_work,1,mpi_data_type_W,rank_right,0,global_comm,&rreq); - MPI_Isend(&work_local_plane,1,mpi_data_type_W,rank_left,0,global_comm,&sreq); + MPI_Irecv(&remote_work,1,mpi_data_type_W,rank_right,0,this->global_comm,&rreq); + MPI_Isend(&work_local_plane,1,mpi_data_type_W,rank_left,0,this->global_comm,&sreq); MPI_Wait(&sreq,&sstat); MPI_Wait(&rreq,&rstat); // collect size in dimension from right neighbor plane - size = vertices.at(dimension+i) - vertices.at(i); + local_size = this->vertices.at(1).x(i) - this->vertices.at(0).x(i); - MPI_Irecv(&remote_size,1,mpi_data_type_T,rank_right,0,global_comm,&rreq); - MPI_Isend(&size,1,mpi_data_type_T,rank_left,0,global_comm,&sreq); + MPI_Irecv(&remote_size,1,mpi_data_type_T,rank_right,0,this->global_comm,&rreq); + MPI_Isend(&local_size,1,mpi_data_type_T,rank_left,0,this->global_comm,&sreq); MPI_Wait(&sreq,&sstat); MPI_Wait(&rreq,&rstat); // automatic selection of gamma to guarantee stability of method (choice of user gamma ignored) - gamma = std::max(4.1, 1.1 * (1.0 + std::max(size,remote_size) / std::min(size,remote_size))); + this->gamma = std::max(4.1, 1.1 * (1.0 + std::max(local_size,remote_size) / std::min(local_size,remote_size))); +#ifdef ALL_DEBUG_ENABLED + MPI_Barrier(this->global_comm); + if( this->local_rank == 0 ) std::cout << "ALL_Staggered::balance(): before shift calculation..." << std::endl; +#endif // calculate shift of borders: // s = 0.5 * gamma * (W_r - W_l) / (W_r + W_l) * (d_r + d_l) // *_r = * of neighbor (remote) // *_l = * of local domain T shift = (T)0; - if (rank_right != MPI_PROC_NULL && local_coords[i] != global_dims[i]-1 && !( remote_work == (T)0 && work_local_plane == (T)0) ) - shift = 1.0 / gamma * 0.5 * (remote_work - work_local_plane) / (remote_work + work_local_plane) * (size + remote_size); + if (rank_right != MPI_PROC_NULL && this->local_coords[i] != this->global_dims[i]-1 && !( remote_work == (T)0 && work_local_plane == (T)0) ) + shift = 1.0 / this->get_gamma() * 0.5 * (remote_work - work_local_plane) / (remote_work + work_local_plane) * (local_size + remote_size); // reimplemented check if move is too large - // in theory should be unneeded due to automatic choice of gamma - T maxmove = 0.4 * std::min(size,remote_size); - maxmove = std::min(0.5*std::min(size-min_size[i],remote_size-min_size[i]),maxmove); + + // determine a maximum move in order to avoid overlapping borders and to guarantee + // a stable shift of the domain borders (avoid the loss of a domain due to too large + // shifts of its borders [no crossing of borders]) + T maxmove; + if (shift > 0.0) + maxmove = 0.49*(remote_size-this->min_size[i]); + else + maxmove = 0.49*(local_size-this->min_size[i]); if (std::fabs(shift) > maxmove) { shift = (shift < 0.0)?-maxmove:maxmove; } +#ifdef ALL_DEBUG_ENABLED + MPI_Barrier(this->global_comm); + if( this->local_rank == 0 ) std::cout << "ALL_Staggered::balance(): before shift distibution..." << std::endl; +#endif // send shift to right neighbors T remote_shift = (T)0; - MPI_Irecv(&remote_shift,1,mpi_data_type_T,rank_left,0,global_comm,&rreq); - MPI_Isend(&shift,1,mpi_data_type_T,rank_right,0,global_comm,&sreq); + MPI_Irecv(&remote_shift,1,mpi_data_type_T,rank_left,0,this->global_comm,&rreq); + MPI_Isend(&shift,1,mpi_data_type_T,rank_right,0,this->global_comm,&sreq); MPI_Wait(&sreq,&sstat); MPI_Wait(&rreq,&rstat); // for now: test case for simple program // if a left neighbor exists: shift left border - if (rank_left != MPI_PROC_NULL && local_coords[i] != 0) - shifted_vertices.at(i) = vertices.at(i) + remote_shift; + // if a left neighbor exists: shift left border + if (rank_left != MPI_PROC_NULL && this->local_coords[i] != 0) + this->shifted_vertices.at(0).set_coordinate(i, this->vertices.at(0).x(i) + remote_shift); else - shifted_vertices.at(i) = vertices.at(i); + this->shifted_vertices.at(0).set_coordinate(i, this->vertices.at(0).x(i)); // if a right neighbor exists: shift right border - if (rank_right != MPI_PROC_NULL && local_coords[i] != global_dims[i]-1) - shifted_vertices.at(dimension+i) = vertices.at(dimension+i) + shift; + if (rank_right != MPI_PROC_NULL && this->local_coords[i] != this->global_dims[i]-1) + this->shifted_vertices.at(1).set_coordinate(i, this->vertices.at(1).x(i) + shift); else - shifted_vertices.at(dimension+i) = vertices.at(dimension+i); + this->shifted_vertices.at(1).set_coordinate(i, this->vertices.at(1).x(i)); - // check if vertices are crossed and throw exception if thing went wrong - if (shifted_vertices.at(dimension+i) < shifted_vertices.at(i)) + + // check if vertices are crossed and throw exception if something went wrong + if (this->shifted_vertices.at(1).x(i) < this->shifted_vertices.at(1).x(i)) { - std::cout << "ERROR on process: " << local_rank << " " - << vertices.at(i) << " " - << vertices.at(dimension+i) << " " - << shifted_vertices.at(i) << " " - << shifted_vertices.at(dimension+i) << " " + std::cout << "ERROR on process: " << this->local_rank << " " + << this->vertices.at(0).x(i) << " " + << this->vertices.at(1).x(i) << " " + << this->shifted_vertices.at(0).x(i) << " " + << this->shifted_vertices.at(1).x(i) << " " << remote_shift << " " << shift << " " << remote_size << " " - << size << " " + << local_size << " " << std::fabs(maxmove) << " " << maxmove << " " << std::endl; @@ -384,19 +313,28 @@ template<class T, class W> void ALL_Staggered_LB<T,W>::balance() "Lower border of process larger than upper border of process!"); } +#ifdef ALL_DEBUG_ENABLED + MPI_Barrier(this->global_comm); + if( this->local_rank == 0 ) std::cout << "ALL_Staggered::balance(): before neighbor search..." << std::endl; +#endif find_neighbors(); } } template<class T, class W> void ALL_Staggered_LB<T,W>::find_neighbors() { + int dimension = this->get_dimension(); + auto work = this->get_work(); + auto vertices = this->get_vertices(); + auto shifted_vertices = this->get_shifted_vertices(); + neighbors.clear(); // collect work from right neighbor plane MPI_Request sreq, rreq; MPI_Status sstat, rstat; // array to store neighbor vertices in Y/Z direction (reused) T* vertices_loc = new T[4]; - T* vertices_rem = new T[8*global_dims[0]*global_dims[1]]; + T* vertices_rem = new T[8*this->global_dims[0]*this->global_dims[1]]; int rem_rank; int rem_coords[3]; @@ -409,44 +347,44 @@ template<class T, class W> void ALL_Staggered_LB<T,W>::find_neighbors() int offset_coords[3]; // X-neighbors are static - n_neighbors->at(0) = n_neighbors->at(1) = 1; + n_neighbors.at(0) = n_neighbors.at(1) = 1; // find X-neighbors - MPI_Cart_shift(global_comm,0,1,&rank_left,&rank_right); + MPI_Cart_shift(this->global_comm,0,1,&rank_left,&rank_right); // store X-neighbors neighbors.push_back(rank_left); neighbors.push_back(rank_right); // find Y-neighbors to get border information from - MPI_Cart_shift(global_comm,1,1,&rank_left,&rank_right); + MPI_Cart_shift(this->global_comm,1,1,&rank_left,&rank_right); // collect border information from local column - vertices_loc[0] = shifted_vertices.at(0); - vertices_loc[1] = shifted_vertices.at(dimension); - MPI_Allgather(vertices_loc,2,mpi_data_type_T,vertices_rem+2*global_dims[0],2,mpi_data_type_T,communicators[1]); + vertices_loc[0] = shifted_vertices.at(0).x(0); + vertices_loc[1] = shifted_vertices.at(1).x(0); + MPI_Allgather(vertices_loc,2,mpi_data_type_T,vertices_rem+2*this->global_dims[0],2,mpi_data_type_T,communicators[1]); // exchange local column information with upper neighbor in Y direction (cart grid) - MPI_Irecv(vertices_rem ,2*global_dims[0],mpi_data_type_T,rank_left, 0,global_comm,&rreq); - MPI_Isend(vertices_rem+2*global_dims[0],2*global_dims[0],mpi_data_type_T,rank_right,0,global_comm,&sreq); + MPI_Irecv(vertices_rem ,2*this->global_dims[0],mpi_data_type_T,rank_left, 0,this->global_comm,&rreq); + MPI_Isend(vertices_rem+2*this->global_dims[0],2*this->global_dims[0],mpi_data_type_T,rank_right,0,this->global_comm,&sreq); // determine the offset in ranks offset_coords[0] = 0; - offset_coords[1] = local_coords[1] - 1; - offset_coords[2] = local_coords[2]; + offset_coords[1] = this->local_coords[1] - 1; + offset_coords[2] = this->local_coords[2]; rem_coords[1] = offset_coords[1]; rem_coords[2] = offset_coords[2]; - MPI_Cart_rank(global_comm,offset_coords,&rank_offset); + MPI_Cart_rank(this->global_comm,offset_coords,&rank_offset); // wait for communication MPI_Wait(&sreq,&sstat); MPI_Wait(&rreq,&rstat); // iterate about neighbor borders to determine the neighborship relation - n_neighbors->at(2) = 0; - for (int x = 0; x < global_dims[0]; ++x) + n_neighbors.at(2) = 0; + for (int x = 0; x < this->global_dims[0]; ++x) { if ( ( vertices_rem[2*x] <= vertices_loc[0] && vertices_loc[0] < vertices_rem[2*x+1] ) || @@ -455,37 +393,37 @@ template<class T, class W> void ALL_Staggered_LB<T,W>::find_neighbors() vertices_loc[1] >= vertices_rem[2*x+1] ) ) { - n_neighbors->at(2)++; + n_neighbors.at(2)++; rem_coords[0] = x; - MPI_Cart_rank(global_comm,rem_coords,&rem_rank); + MPI_Cart_rank(this->global_comm,rem_coords,&rem_rank); neighbors.push_back(rem_rank); } } // barrier to ensure every process concluded the calculations before overwriting remote borders! - MPI_Barrier(global_comm); + MPI_Barrier(this->global_comm); // exchange local column information with lower neighbor in Y direction (cart grid) - MPI_Irecv(vertices_rem ,2*global_dims[0],mpi_data_type_T,rank_right, 0,global_comm,&rreq); - MPI_Isend(vertices_rem+2*global_dims[0],2*global_dims[0],mpi_data_type_T,rank_left, 0,global_comm,&sreq); + MPI_Irecv(vertices_rem ,2*this->global_dims[0],mpi_data_type_T,rank_right, 0,this->global_comm,&rreq); + MPI_Isend(vertices_rem+2*this->global_dims[0],2*this->global_dims[0],mpi_data_type_T,rank_left, 0,this->global_comm,&sreq); // determine the offset in ranks offset_coords[0] = 0; - offset_coords[1] = local_coords[1] + 1; - offset_coords[2] = local_coords[2]; + offset_coords[1] = this->local_coords[1] + 1; + offset_coords[2] = this->local_coords[2]; rem_coords[1] = offset_coords[1]; rem_coords[2] = offset_coords[2]; - MPI_Cart_rank(global_comm,offset_coords,&rank_offset); + MPI_Cart_rank(this->global_comm,offset_coords,&rank_offset); // wait for communication MPI_Wait(&sreq,&sstat); MPI_Wait(&rreq,&rstat); // iterate about neighbor borders to determine the neighborship relation - n_neighbors->at(3) = 0; - for (int x = 0; x < global_dims[0]; ++x) + n_neighbors.at(3) = 0; + for (int x = 0; x < this->global_dims[0]; ++x) { if ( ( vertices_rem[2*x] <= vertices_loc[0] && vertices_loc[0] < vertices_rem[2*x+1] ) || @@ -494,41 +432,41 @@ template<class T, class W> void ALL_Staggered_LB<T,W>::find_neighbors() vertices_loc[1] >= vertices_rem[2*x+1] ) ) { - n_neighbors->at(3)++; + n_neighbors.at(3)++; rem_coords[0] = x; - MPI_Cart_rank(global_comm,rem_coords,&rem_rank); + MPI_Cart_rank(this->global_comm,rem_coords,&rem_rank); neighbors.push_back(rem_rank); } } // barrier to ensure every process concluded the calculations before overwriting remote borders! - MPI_Barrier(global_comm); + MPI_Barrier(this->global_comm); // find Z-neighbors to get border information from - MPI_Cart_shift(global_comm,2,1,&rank_left,&rank_right); + MPI_Cart_shift(this->global_comm,2,1,&rank_left,&rank_right); // collect border information from local column - vertices_loc[0] = shifted_vertices.at(0); - vertices_loc[1] = shifted_vertices.at(dimension); - vertices_loc[2] = shifted_vertices.at(1); - vertices_loc[3] = shifted_vertices.at(1+dimension); + vertices_loc[0] = shifted_vertices.at(0).x(0); + vertices_loc[1] = shifted_vertices.at(1).x(0); + vertices_loc[2] = shifted_vertices.at(0).x(1); + vertices_loc[3] = shifted_vertices.at(1).x(1); - MPI_Barrier(global_comm); + MPI_Barrier(this->global_comm); - MPI_Allgather(vertices_loc,4,mpi_data_type_T,vertices_rem+4*global_dims[0]*global_dims[1],4,mpi_data_type_T,communicators[2]); + MPI_Allgather(vertices_loc,4,mpi_data_type_T,vertices_rem+4*this->global_dims[0]*this->global_dims[1],4,mpi_data_type_T,communicators[2]); // exchange local column information with upper neighbor in Z direction (cart grid) - MPI_Irecv(vertices_rem ,4*global_dims[0]*global_dims[1],mpi_data_type_T,rank_left, 0,global_comm,&rreq); - MPI_Isend(vertices_rem+4*global_dims[0]*global_dims[1],4*global_dims[0]*global_dims[1],mpi_data_type_T,rank_right,0,global_comm,&sreq); + MPI_Irecv(vertices_rem ,4*this->global_dims[0]*this->global_dims[1],mpi_data_type_T,rank_left, 0,this->global_comm,&rreq); + MPI_Isend(vertices_rem+4*this->global_dims[0]*this->global_dims[1],4*this->global_dims[0]*this->global_dims[1],mpi_data_type_T,rank_right,0,this->global_comm,&sreq); // determine the offset in ranks offset_coords[0] = 0; offset_coords[1] = 0; - offset_coords[2] = local_coords[2]-1; + offset_coords[2] = this->local_coords[2]-1; rem_coords[2] = offset_coords[2]; - MPI_Cart_rank(global_comm,offset_coords,&rank_offset); + MPI_Cart_rank(this->global_comm,offset_coords,&rank_offset); // wait for communication MPI_Wait(&sreq,&sstat); @@ -536,91 +474,91 @@ template<class T, class W> void ALL_Staggered_LB<T,W>::find_neighbors() // iterate about neighbor borders to determine the neighborship relation - n_neighbors->at(4) = 0; - for (int y = 0; y < global_dims[1]; ++y) + n_neighbors.at(4) = 0; + for (int y = 0; y < this->global_dims[1]; ++y) { - for (int x = 0; x < global_dims[0]; ++x) + for (int x = 0; x < this->global_dims[0]; ++x) { if ( ( - ( vertices_rem[4*(x+y*global_dims[0])+2] <= vertices_loc[2] && vertices_loc[2] < vertices_rem[4*(x+y*global_dims[0])+3] ) || - ( vertices_rem[4*(x+y*global_dims[0])+2] < vertices_loc[3] && vertices_loc[3] <= vertices_rem[4*(x+y*global_dims[0])+3] ) || - ( vertices_rem[4*(x+y*global_dims[0])+2] >= vertices_loc[2] && vertices_loc[2] < vertices_rem[4*(x+y*global_dims[0])+3] && - vertices_loc[3] >= vertices_rem[4*(x+y*global_dims[0])+3] ) + ( vertices_rem[4*(x+y*this->global_dims[0])+2] <= vertices_loc[2] && vertices_loc[2] < vertices_rem[4*(x+y*this->global_dims[0])+3] ) || + ( vertices_rem[4*(x+y*this->global_dims[0])+2] < vertices_loc[3] && vertices_loc[3] <= vertices_rem[4*(x+y*this->global_dims[0])+3] ) || + ( vertices_rem[4*(x+y*this->global_dims[0])+2] >= vertices_loc[2] && vertices_loc[2] < vertices_rem[4*(x+y*this->global_dims[0])+3] && + vertices_loc[3] >= vertices_rem[4*(x+y*this->global_dims[0])+3] ) ) ) if ( ( - ( vertices_rem[4*(x+y*global_dims[0])] <= vertices_loc[0] && vertices_loc[0] < vertices_rem[4*(x+y*global_dims[0])+1] ) || - ( vertices_rem[4*(x+y*global_dims[0])] < vertices_loc[1] && vertices_loc[1] <= vertices_rem[4*(x+y*global_dims[0])+1] ) || - ( vertices_rem[4*(x+y*global_dims[0])] >= vertices_loc[0] && vertices_loc[0] < vertices_rem[4*(x+y*global_dims[0])+1] && - vertices_loc[1] >= vertices_rem[4*(x+y*global_dims[0])+1] ) + ( vertices_rem[4*(x+y*this->global_dims[0])] <= vertices_loc[0] && vertices_loc[0] < vertices_rem[4*(x+y*this->global_dims[0])+1] ) || + ( vertices_rem[4*(x+y*this->global_dims[0])] < vertices_loc[1] && vertices_loc[1] <= vertices_rem[4*(x+y*this->global_dims[0])+1] ) || + ( vertices_rem[4*(x+y*this->global_dims[0])] >= vertices_loc[0] && vertices_loc[0] < vertices_rem[4*(x+y*this->global_dims[0])+1] && + vertices_loc[1] >= vertices_rem[4*(x+y*this->global_dims[0])+1] ) ) ) { - n_neighbors->at(4)++; + n_neighbors.at(4)++; rem_coords[1] = y; rem_coords[0] = x; - MPI_Cart_rank(global_comm,rem_coords,&rem_rank); + MPI_Cart_rank(this->global_comm,rem_coords,&rem_rank); neighbors.push_back(rem_rank); } } } // barrier to ensure every process concluded the calculations before overwriting remote borders! - MPI_Barrier(global_comm); + MPI_Barrier(this->global_comm); // exchange local column information with upper neighbor in Y direction (cart grid) - MPI_Irecv(vertices_rem ,4*global_dims[0]*global_dims[1],mpi_data_type_T,rank_right, 0,global_comm,&rreq); - MPI_Isend(vertices_rem+4*global_dims[0]*global_dims[1],4*global_dims[0]*global_dims[1],mpi_data_type_T,rank_left,0,global_comm,&sreq); + MPI_Irecv(vertices_rem ,4*this->global_dims[0]*this->global_dims[1],mpi_data_type_T,rank_right, 0,this->global_comm,&rreq); + MPI_Isend(vertices_rem+4*this->global_dims[0]*this->global_dims[1],4*this->global_dims[0]*this->global_dims[1],mpi_data_type_T,rank_left,0,this->global_comm,&sreq); // determine the offset in ranks offset_coords[0] = 0; offset_coords[1] = 0; - offset_coords[2] = local_coords[2]+1; + offset_coords[2] = this->local_coords[2]+1; rem_coords[2] = offset_coords[2]; - MPI_Cart_rank(global_comm,offset_coords,&rank_offset); + MPI_Cart_rank(this->global_comm,offset_coords,&rank_offset); // wait for communication MPI_Wait(&sreq,&sstat); MPI_Wait(&rreq,&rstat); // iterate about neighbor borders to determine the neighborship relation - n_neighbors->at(5) = 0; - for (int y = 0; y < global_dims[1]; ++y) + n_neighbors.at(5) = 0; + for (int y = 0; y < this->global_dims[1]; ++y) { - for (int x = 0; x < global_dims[0]; ++x) + for (int x = 0; x < this->global_dims[0]; ++x) { if ( ( - ( vertices_rem[4*(x+y*global_dims[0])+2] <= vertices_loc[2] && vertices_loc[2] < vertices_rem[4*(x+y*global_dims[0])+3] ) || - ( vertices_rem[4*(x+y*global_dims[0])+2] < vertices_loc[3] && vertices_loc[3] <= vertices_rem[4*(x+y*global_dims[0])+3] ) || - ( vertices_rem[4*(x+y*global_dims[0])+2] >= vertices_loc[2] && vertices_loc[2] < vertices_rem[4*(x+y*global_dims[0])+3] && - vertices_loc[3] >= vertices_rem[4*(x+y*global_dims[0])+3] ) + ( vertices_rem[4*(x+y*this->global_dims[0])+2] <= vertices_loc[2] && vertices_loc[2] < vertices_rem[4*(x+y*this->global_dims[0])+3] ) || + ( vertices_rem[4*(x+y*this->global_dims[0])+2] < vertices_loc[3] && vertices_loc[3] <= vertices_rem[4*(x+y*this->global_dims[0])+3] ) || + ( vertices_rem[4*(x+y*this->global_dims[0])+2] >= vertices_loc[2] && vertices_loc[2] < vertices_rem[4*(x+y*this->global_dims[0])+3] && + vertices_loc[3] >= vertices_rem[4*(x+y*this->global_dims[0])+3] ) ) ) if ( ( - ( vertices_rem[4*(x+y*global_dims[0])] <= vertices_loc[0] && vertices_loc[0] < vertices_rem[4*(x+y*global_dims[0])+1] ) || - ( vertices_rem[4*(x+y*global_dims[0])] < vertices_loc[1] && vertices_loc[1] <= vertices_rem[4*(x+y*global_dims[0])+1] ) || - ( vertices_rem[4*(x+y*global_dims[0])] >= vertices_loc[0] && vertices_loc[0] < vertices_rem[4*(x+y*global_dims[0])+1] && - vertices_loc[1] >= vertices_rem[4*(x+y*global_dims[0])+1] ) + ( vertices_rem[4*(x+y*this->global_dims[0])] <= vertices_loc[0] && vertices_loc[0] < vertices_rem[4*(x+y*this->global_dims[0])+1] ) || + ( vertices_rem[4*(x+y*this->global_dims[0])] < vertices_loc[1] && vertices_loc[1] <= vertices_rem[4*(x+y*this->global_dims[0])+1] ) || + ( vertices_rem[4*(x+y*this->global_dims[0])] >= vertices_loc[0] && vertices_loc[0] < vertices_rem[4*(x+y*this->global_dims[0])+1] && + vertices_loc[1] >= vertices_rem[4*(x+y*this->global_dims[0])+1] ) ) ) { - n_neighbors->at(5)++; + n_neighbors.at(5)++; rem_coords[1] = y; rem_coords[0] = x; - MPI_Cart_rank(global_comm,rem_coords,&rem_rank); + MPI_Cart_rank(this->global_comm,rem_coords,&rem_rank); neighbors.push_back(rem_rank); } } } // barrier to ensure every process concluded the calculations before overwriting remote borders! - MPI_Barrier(global_comm); + MPI_Barrier(this->global_comm); // clear up vertices array delete[] vertices_loc; @@ -635,7 +573,7 @@ template<class T, class W> void ALL_Staggered_LB<T,W>::get_neighbors(std::vector // provide list of neighbors in each direction template<class T, class W> void ALL_Staggered_LB<T,W>::get_neighbors(int** ret) { - *ret = n_neighbors->data(); + *ret = n_neighbors.data(); } #endif diff --git a/include/ALL_Tensor.hpp b/include/ALL_Tensor.hpp index d8f333f2eae03465b9529917da57bd55c95060d7..192de8ebc643b5d15c1a5b7aaabc871917760ae0 100644 --- a/include/ALL_Tensor.hpp +++ b/include/ALL_Tensor.hpp @@ -34,159 +34,73 @@ POSSIBILITY OF SUCH DAMAGE. #include <mpi.h> #include <exception> #include "ALL_CustomExceptions.hpp" +#include "ALL_LB.hpp" // T: data type of vertices used in the balancer // W: data type of the work used in the balancer -template <class T, class W> class ALL_Tensor_LB +template <class T, class W> class ALL_Tensor_LB : public ALL_LB<T,W> { public: ALL_Tensor_LB() {} - ALL_Tensor_LB(int d, W w, T g) : dimension(d), - work(w), - gamma(g) + ALL_Tensor_LB(int d, W w, T g) : + ALL_LB<T,W>(d, g) { + this->set_work(w); // need the lower and upper bounds // of the domain for the tensor-based // load-balancing scheme - vertices = new T[2*dimension]; - shifted_vertices = new T[2*dimension]; - // global dimension of the system in each coordinate - global_dims = new int[dimension]; - // coordinates of the local domain in the system - local_coords = new int[dimension]; - // periodicity of the MPI communicator / system - periodicity = new int[dimension]; // array of MPI communicators for each direction (to collect work // on each plane) - communicators = new MPI_Comm[dimension]; - n_neighbors = new int[2*dimension]; + communicators.resize(d); + n_neighbors.resize(2*d); } - ~ALL_Tensor_LB(); - - void set_vertices(T*); + ~ALL_Tensor_LB() = default; // setup communicators for the exchange of work in each domain - void setup(MPI_Comm); + void setup() override; // do a load-balancing step - virtual void balance(); + virtual void balance(int step) override; // getter for variables (passed by reference to avoid // direct access to private members of the object) - // getter for base vertices - void get_vertices(T*); - // getter for shifted vertices - void get_shifted_vertices(T*); - // getter for shifted vertices - void get_shifted_vertices(std::vector<T>&); - // neighbors // provide list of neighbors - virtual void get_neighbors(std::vector<int>&); + void get_neighbors(std::vector<int>&) override; // provide list of neighbors in each direction - virtual void get_neighbors(int**); - + void get_neighbors(int**) override; + // empty override for setting method specific data + virtual void set_data(void*) override {} private: - // dimension of the system - int dimension; - - // shift cooeficient - T gamma; - - // work per domain - W work; - - // vertices - T* vertices; - - // vertices after load-balancing shift - T* shifted_vertices; - - // MPI values - // global communicator - MPI_Comm global_comm; - // rank of the local domain in the communicator - int local_rank; - // global dimension of the system in each coordinate - int* global_dims; - // coordinates of the local domain in the system - int* local_coords; - // periodicity of the MPI communicator / system - int* periodicity; - // type for MPI communication MPI_Datatype mpi_data_type_T; MPI_Datatype mpi_data_type_W; // array of MPI communicators for each direction (to collect work // on each plane) - MPI_Comm* communicators; + std::vector<MPI_Comm> communicators; // list of neighbors std::vector<int> neighbors; - int* n_neighbors; + std::vector<int> n_neighbors; }; -template <class T, class W> ALL_Tensor_LB<T,W>::~ALL_Tensor_LB() -{ - if (vertices) delete vertices; - if (shifted_vertices) delete shifted_vertices; - if (global_dims) delete global_dims; - if (local_coords) delete local_coords; - if (periodicity) delete periodicity; - if (communicators) delete communicators; - if (n_neighbors) delete n_neighbors; -} - -template <class T, class W> void ALL_Tensor_LB<T,W>::get_vertices(T* result) -{ - for (int i = 0; i < 2*dimension; ++i) - { - result[i] = vertices[i]; - } -} - -template <class T, class W> void ALL_Tensor_LB<T,W>::get_shifted_vertices(T* result) -{ - for (int i = 0; i < 2*dimension; ++i) - { - result[i] = shifted_vertices[i]; - } -} - -template <class T, class W> void ALL_Tensor_LB<T,W>::get_shifted_vertices(std::vector<T>& result) -{ - for (int i = 0; i < 2*dimension; ++i) - { - result.at(i) = shifted_vertices[i]; - } -} - -// set the actual vertices (unsafe due to array copy) -template <class T, class W> void ALL_Tensor_LB<T,W>::set_vertices(T* v) -{ - for (int i = 0; i < 2*dimension; ++i) - { - vertices[i] = v[i]; - } -} - // setup routine for the tensor-based load-balancing scheme // requires: -// global_comm (int): cartesian MPI communicator, from +// this->global_comm (int): cartesian MPI communicator, from // which separate sub communicators // are derived in order to represent // each plane of domains in the system -template <class T, class W> void ALL_Tensor_LB<T,W>::setup(MPI_Comm comm) +template <class T, class W> void ALL_Tensor_LB<T,W>::setup() { int status; // check if Communicator is cartesian - MPI_Topo_test(comm, &status); + MPI_Topo_test(this->global_comm, &status); if (status != MPI_CART) { throw ALL_Invalid_Comm_Type_Exception( @@ -195,25 +109,19 @@ template <class T, class W> void ALL_Tensor_LB<T,W>::setup(MPI_Comm comm) __LINE__, "Cartesian MPI communicator required, passed communicator not cartesian"); } - - // allocate arrays to correct sizes - global_dims = new int[dimension]; - local_coords = new int[dimension]; - periodicity = new int[dimension]; - - // store MPI communicator to local variable - global_comm = comm; + int dim = this->get_dimension(); + // get the local coordinates, periodicity and global size from the MPI communicator - MPI_Cart_get(global_comm, dimension, global_dims, periodicity, local_coords); + MPI_Cart_get(this->global_comm, dim, this->global_dims.data(), this->periodicity.data(), this->local_coords.data()); // get the local rank from the MPI communicator - MPI_Cart_rank(global_comm, local_coords, &local_rank); + MPI_Cart_rank(this->global_comm, this->local_coords.data(), &this->local_rank); // create sub-communicators - for (int i = 0; i < dimension; ++i) + for (int i = 0; i < dim; ++i) { - MPI_Comm_split(global_comm,local_coords[i],0,&communicators[i]); + MPI_Comm_split(this->global_comm,this->local_coords.at(i),0,&communicators[i]); } // determine correct MPI data type for template T @@ -232,9 +140,9 @@ template <class T, class W> void ALL_Tensor_LB<T,W>::setup(MPI_Comm comm) int rank_left, rank_right; neighbors.clear(); - for (int i = 0; i < dimension; ++i) + for (int i = 0; i < dim; ++i) { - MPI_Cart_shift(global_comm,i,1,&rank_left,&rank_right); + MPI_Cart_shift(this->global_comm,i,1,&rank_left,&rank_right); neighbors.push_back(rank_left); neighbors.push_back(rank_right); n_neighbors[2*i] = 1; @@ -243,40 +151,44 @@ template <class T, class W> void ALL_Tensor_LB<T,W>::setup(MPI_Comm comm) } -template<class T, class W> void ALL_Tensor_LB<T,W>::balance() +template<class T, class W> void ALL_Tensor_LB<T,W>::balance(int) { + int dim = this->get_dimension(); + std::vector<ALL_Point<T>>& vertices = this->get_vertices(); + std::vector<ALL_Point<T>>& shifted_vertices = this->get_shifted_vertices(); + // loop over all available dimensions - for (int i = 0; i < dimension; ++i) + for (int i = 0; i < dim; ++i) { W work_local_plane; // collect work from all processes in the same plane - MPI_Allreduce(&work,&work_local_plane,1,mpi_data_type_W,MPI_SUM,communicators[i]); + MPI_Allreduce(this->get_work().data(),&work_local_plane,1,mpi_data_type_W,MPI_SUM,communicators.at(i)); // correct right border: W remote_work; - T size; + T local_size; T remote_size; // determine neighbors int rank_left, rank_right; - MPI_Cart_shift(global_comm,i,1,&rank_left,&rank_right); + MPI_Cart_shift(this->global_comm,i,1,&rank_left,&rank_right); // collect work from right neighbor plane MPI_Request sreq, rreq; MPI_Status sstat, rstat; - MPI_Irecv(&remote_work,1,mpi_data_type_W,rank_right,0,global_comm,&rreq); - MPI_Isend(&work_local_plane,1,mpi_data_type_W,rank_left,0,global_comm,&sreq); + MPI_Irecv(&remote_work,1,mpi_data_type_W,rank_right,0,this->global_comm,&rreq); + MPI_Isend(&work_local_plane,1,mpi_data_type_W,rank_left,0,this->global_comm,&sreq); MPI_Wait(&sreq,&sstat); MPI_Wait(&rreq,&rstat); // collect size in dimension from right neighbor plane - size = vertices[dimension+i] - vertices[i]; + local_size = vertices.at(1).x(i) - vertices.at(0).x(i); - MPI_Irecv(&remote_size,1,mpi_data_type_T,rank_right,0,global_comm,&rreq); - MPI_Isend(&size,1,mpi_data_type_T,rank_left,0,global_comm,&sreq); + MPI_Irecv(&remote_size,1,mpi_data_type_T,rank_right,0,this->global_comm,&rreq); + MPI_Isend(&local_size,1,mpi_data_type_T,rank_left,0,this->global_comm,&sreq); MPI_Wait(&sreq,&sstat); MPI_Wait(&rreq,&rstat); @@ -285,41 +197,42 @@ template<class T, class W> void ALL_Tensor_LB<T,W>::balance() // *_r = * of neighbor (remote) // *_l = * of local domain T shift = (T)0; - if (rank_right != MPI_PROC_NULL && local_coords[i] != global_dims[i]-1 && !( remote_work == (T)0 && work_local_plane == (T)0) ) - shift = 1.0 / gamma * 0.5 * (remote_work - work_local_plane) / (remote_work + work_local_plane) * (size + remote_size); - - if (shift < 0.0) + if (rank_right != MPI_PROC_NULL && this->local_coords.at(i) != this->global_dims.at(i)-1 && !( remote_work == (T)0 && work_local_plane == (T)0) ) + shift = 1.0 / this->get_gamma() * 0.5 * (remote_work - work_local_plane) / (remote_work + work_local_plane) * (local_size + remote_size); + + // determine a maximum move in order to avoid overlapping borders and to guarantee + // a stable shift of the domain borders (avoid the loss of a domain due to too large + // shifts of its borders [no crossing of borders]) + T maxmove; + if (shift > 0.0) + maxmove = 0.49*(remote_size-this->min_size[i]); + else + maxmove = 0.49*(local_size-this->min_size[i]); + + if (std::fabs(shift) > maxmove) { - if (abs(shift) > 0.45 * size) shift = -0.45 * size; + shift = (shift < 0.0)?-maxmove:maxmove; } - else - { - if (abs(shift) > 0.45 * remote_size) shift = 0.45 * remote_size; - } - // send shift to right neighbors T remote_shift = (T)0; - MPI_Irecv(&remote_shift,1,mpi_data_type_T,rank_left,0,global_comm,&rreq); - MPI_Isend(&shift,1,mpi_data_type_T,rank_right,0,global_comm,&sreq); + MPI_Irecv(&remote_shift,1,mpi_data_type_T,rank_left,0,this->global_comm,&rreq); + MPI_Isend(&shift,1,mpi_data_type_T,rank_right,0,this->global_comm,&sreq); MPI_Wait(&sreq,&sstat); MPI_Wait(&rreq,&rstat); - // for now: test case for simple program - // if a left neighbor exists: shift left border - if (rank_left != MPI_PROC_NULL && local_coords[i] != 0) - shifted_vertices[i] = vertices[i] + remote_shift; + if (rank_left != MPI_PROC_NULL && this->local_coords[i] != 0) + this->shifted_vertices.at(0).set_coordinate(i, vertices.at(0).x(i) + remote_shift); else - shifted_vertices[i] = vertices[i]; + this->shifted_vertices.at(0).set_coordinate(i, vertices.at(0).x(i)); // if a right neighbor exists: shift right border - if (rank_right != MPI_PROC_NULL && local_coords[i] != global_dims[i]-1) - shifted_vertices[dimension+i] = vertices[dimension+i] + shift; + if (rank_right != MPI_PROC_NULL && this->local_coords[i] != this->global_dims[i]-1) + this->shifted_vertices.at(1).set_coordinate(i, vertices.at(1).x(i) + shift); else - shifted_vertices[dimension+i] = vertices[dimension+i]; - + this->shifted_vertices.at(1).set_coordinate(i, vertices.at(1).x(i)); } } @@ -331,6 +244,6 @@ template<class T, class W> void ALL_Tensor_LB<T,W>::get_neighbors(std::vector<in // provide list of neighbors in each direction template<class T, class W> void ALL_Tensor_LB<T,W>::get_neighbors(int** ret) { - *ret = n_neighbors; + *ret = n_neighbors.data(); } #endif diff --git a/include/ALL_Unstructured.hpp b/include/ALL_Unstructured.hpp index f6797558361ad1efaec0af6fa450ead8096b8f1a..76cb14a327bdcababa26b1d08451d8ccb5a81fe7 100644 --- a/include/ALL_Unstructured.hpp +++ b/include/ALL_Unstructured.hpp @@ -67,103 +67,57 @@ POSSIBILITY OF SUCH DAMAGE. #include <cmath> #include "ALL_CustomExceptions.hpp" #include "ALL_Point.hpp" +#include "ALL_LB.hpp" // T: data type of vertices used in the balancer // W: data type of the work used in the balancer -template <class T, class W> class ALL_Unstructured_LB +template <class T, class W> class ALL_Unstructured_LB : public ALL_LB<T,W> { public: ALL_Unstructured_LB() {} - ALL_Unstructured_LB(int d, W w, T g) : dimension(d), - work(w), - gamma(g) + ALL_Unstructured_LB(int d, W w, T g) : + ALL_LB<T,W>(d, g) { + this->set_work(w); // need the lower and upper bounds // of the domain for the tensor-based // load-balancing scheme - n_vertices = (int)std::pow(2,dimension); - vertices = new std::vector<ALL_Point<T>>(n_vertices); - vertex_rank = new int[n_vertices]; - shifted_vertices = new std::vector<ALL_Point<T>>(n_vertices); - // global dimension of the system in each coordinate - global_dims = new int[dimension]; - // coordinates of the local domain in the system - local_coords = new int[dimension]; - // periodicity of the MPI communicator / system - periodicity = new int[dimension]; - - // array of MPI communicators for each vertex (2 in 1D, 4 in 2D, 8 in 3D) - communicators = new MPI_Comm[n_vertices]; + vertex_rank.resize(8); } - ~ALL_Unstructured_LB(); - - void set_vertices(T*); - void set_vertices(std::vector<ALL_Point<T>>&); + ~ALL_Unstructured_LB() override; // setup communicators for the exchange of work in each domain - void setup(MPI_Comm); + void setup() override; // do a load-balancing step - virtual void balance(); + void balance(int step) override; // getter for variables (passed by reference to avoid // direct access to private members of the object) - // getter for base vertices - void get_vertices(T*); - // getter for shifted vertices - void get_shifted_vertices(T*); - // getter for shifted vertices - void get_shifted_vertices(std::vector<ALL_Point<T>>&); - // neighbors // provide list of neighbors - virtual void get_neighbors(std::vector<int>&); + void get_neighbors(std::vector<int>&) override; // provide list of neighbors in each direction - virtual void get_neighbors(int**); - + void get_neighbors(int**) override; + // empty override for setting method specific data + virtual void set_data(void*) override {} + private: - // dimension of the system - int dimension; - - // number of vertices (based on dimension) - int n_vertices; - - // shift cooeficient - T gamma; - - // work per domain - W work; - - // vertices - std::vector<ALL_Point<T>>* vertices; - - // vertices after load-balancing shift - std::vector<ALL_Point<T>>* shifted_vertices; - // MPI values - // global communicator - MPI_Comm global_comm; - // rank of the local domain in the communicator - int local_rank; + // rank(s) in the communicators of the vertices - int* vertex_rank; - // global dimension of the system in each coordinate - int* global_dims; - // coordinates of the local domain in the system - int* local_coords; - // periodicity of the MPI communicator / system - int* periodicity; - + std::vector<int> vertex_rank; + // type for MPI communication MPI_Datatype mpi_data_type_T; MPI_Datatype mpi_data_type_W; // array of MPI communicators for each direction (to collect work // on each plane) - MPI_Comm* communicators; + std::vector<MPI_Comm> communicators; // list of neighbors std::vector<int> neighbors; @@ -171,60 +125,13 @@ template <class T, class W> class ALL_Unstructured_LB // find neighbors (more sophisticated than for tensor-product // variant of LB) void find_neighbors(); + + // number of vertices (since it is not equal for all domains) + int n_vertices; }; template <class T, class W> ALL_Unstructured_LB<T,W>::~ALL_Unstructured_LB() { - if (vertices) delete vertices; - if (vertex_rank) delete vertex_rank; - if (shifted_vertices) delete shifted_vertices; - if (global_dims) delete global_dims; - if (local_coords) delete local_coords; - if (periodicity) delete periodicity; - if (communicators) delete communicators; -} - -template <class T, class W> void ALL_Unstructured_LB<T,W>::get_vertices(T* result) -{ - for (int i = 0; i < vertices->size()*dimension; ++i) - { - result[i] = vertices->at(i); - } -} - -template <class T, class W> void ALL_Unstructured_LB<T,W>::get_shifted_vertices(T* result) -{ - for (int i = 0; i < vertices->size(); ++i) - { - for (int j = 0; j < dimension; ++j) - { - result[i*dimension+j] = shifted_vertices->at(i).x(j); - } - } -} - -template <class T, class W> void ALL_Unstructured_LB<T,W>::get_shifted_vertices(std::vector<ALL_Point<T>>& result) -{ - for (int i = 0; i < shifted_vertices->size(); ++i) - { - result.at(i) = shifted_vertices->at(i); - } -} - -// set the actual vertices (unsafe due to array copy) -template <class T, class W> void ALL_Unstructured_LB<T,W>::set_vertices(T* v) -{ - for (int i = 0; i < vertices->size()*dimension; ++i) - { - vertices->at(i) = v[i]; - } -} - -// set the actual vertices -template <class T, class W> void ALL_Unstructured_LB<T,W>::set_vertices(std::vector<ALL_Point<T>>& v) -{ - // TODO: check if size(v) = 2^dim - *vertices = v; } // provide list of neighbors @@ -238,10 +145,11 @@ template<class T, class W> void ALL_Unstructured_LB<T,W>::get_neighbors(int** re *ret = neighbors.data(); } -template <class T, class W> void ALL_Unstructured_LB<T,W>::setup(MPI_Comm comm) +template <class T, class W> void ALL_Unstructured_LB<T,W>::setup() { - global_comm = comm; - + n_vertices = this->vertices.size(); + communicators = std::vector<MPI_Comm>(n_vertices); + // determine correct MPI data type for template T if (std::is_same<T,double>::value) mpi_data_type_T = MPI_DOUBLE; else if (std::is_same<T,float>::value) mpi_data_type_T = MPI_FLOAT; @@ -271,11 +179,11 @@ template <class T, class W> void ALL_Unstructured_LB<T,W>::setup(MPI_Comm comm) } MPI_Group all; - MPI_Comm_group(global_comm,&all); + MPI_Comm_group(this->global_comm,&all); // check if communicator is cartesian int status; - MPI_Topo_test(global_comm, &status); + MPI_Topo_test(this->global_comm, &status); if (status != MPI_CART) { @@ -286,11 +194,11 @@ template <class T, class W> void ALL_Unstructured_LB<T,W>::setup(MPI_Comm comm) "Cartesian MPI communicator required, passed communicator is not cartesian"); } - // get the local coordinates, periodicity and global size from the MPI communicator - MPI_Cart_get(global_comm, dimension, global_dims, periodicity, local_coords); + // get the local coordinates, this->periodicity and global size from the MPI communicator + MPI_Cart_get(this->global_comm, this->dimension, this->global_dims.data(), this->periodicity.data(), this->local_coords.data()); // get the local rank from the MPI communicator - MPI_Cart_rank(global_comm, local_coords, &local_rank); + MPI_Cart_rank(this->global_comm, this->local_coords.data(), &this->local_rank); // groups required for new communicators MPI_Group groups[n_vertices]; @@ -298,7 +206,7 @@ template <class T, class W> void ALL_Unstructured_LB<T,W>::setup(MPI_Comm comm) int processes[n_vertices][n_vertices]; // shifted local coordinates to find neighboring processes - int shifted_coords[dimension]; + int shifted_coords[this->dimension]; /* @@ -313,16 +221,16 @@ template <class T, class W> void ALL_Unstructured_LB<T,W>::setup(MPI_Comm comm) for (int x = 0; x < 2; ++x) { // group / communicator for vertex 0 - shifted_coords[0] = local_coords[0] - 1 + x; + shifted_coords[0] = this->local_coords.at(0) - 1 + x; - MPI_Cart_rank(global_comm, shifted_coords, &(processes[x][0])); + MPI_Cart_rank(this->global_comm, shifted_coords, &(processes[x][0])); - shifted_coords[0] = local_coords[0] + x; + shifted_coords[0] = this->local_coords.at(0) + x; - MPI_Cart_rank(global_comm, shifted_coords, &(processes[x][1])); + MPI_Cart_rank(this->global_comm, shifted_coords, &(processes[x][1])); MPI_Group_incl(all, 2, &processes[x][0], &groups[x]); - MPI_Comm_create(global_comm, groups[x], &communicators[x]); + MPI_Comm_create(this->global_comm, groups[x], &communicators[x]); // get local rank in vertex communicator MPI_Comm_rank(communicators[x],&vertex_rank[x]); @@ -343,28 +251,28 @@ template <class T, class W> void ALL_Unstructured_LB<T,W>::setup(MPI_Comm comm) { int vertex = 2 * y + x; - shifted_coords[0] = local_coords[0] - 1 + x; - shifted_coords[1] = local_coords[1] - 1 + y; + shifted_coords[0] = this->local_coords.at(0) - 1 + x; + shifted_coords[1] = this->local_coords.at(1) - 1 + y; - MPI_Cart_rank(global_comm, shifted_coords, &(processes[vertex][0])); + MPI_Cart_rank(this->global_comm, shifted_coords, &(processes[vertex][0])); - shifted_coords[0] = local_coords[0] + x; - shifted_coords[1] = local_coords[1] - 1 + y; + shifted_coords[0] = this->local_coords.at(0) + x; + shifted_coords[1] = this->local_coords.at(1) - 1 + y; - MPI_Cart_rank(global_comm, shifted_coords, &(processes[vertex][1])); + MPI_Cart_rank(this->global_comm, shifted_coords, &(processes[vertex][1])); - shifted_coords[0] = local_coords[0] - 1 + x; - shifted_coords[1] = local_coords[1] + y; + shifted_coords[0] = this->local_coords.at(0) - 1 + x; + shifted_coords[1] = this->local_coords.at(1) + y; - MPI_Cart_rank(global_comm, shifted_coords, &(processes[vertex][2])); + MPI_Cart_rank(this->global_comm, shifted_coords, &(processes[vertex][2])); - shifted_coords[0] = local_coords[0] + x; - shifted_coords[1] = local_coords[1] + y; + shifted_coords[0] = this->local_coords.at(0) + x; + shifted_coords[1] = this->local_coords.at(1) + y; - MPI_Cart_rank(global_comm, shifted_coords, &(processes[vertex][3])); + MPI_Cart_rank(this->global_comm, shifted_coords, &(processes[vertex][3])); MPI_Group_incl(all, 4, &processes[vertex][0], &groups[vertex]); - MPI_Comm_create(global_comm, groups[vertex], &communicators[vertex]); + MPI_Comm_create(this->global_comm, groups[vertex], &communicators[vertex]); // get local rank in vertex communicator MPI_Comm_rank(communicators[vertex],&vertex_rank[vertex]); @@ -392,56 +300,56 @@ template <class T, class W> void ALL_Unstructured_LB<T,W>::setup(MPI_Comm comm) int vertex = 4 * z + 2 * y + x; // group / communicator for vertex 0,2,4,6 - shifted_coords[0] = local_coords[0] - 1 + x; - shifted_coords[1] = local_coords[1] - 1 + y; - shifted_coords[2] = local_coords[2] - 1 + z; + shifted_coords[0] = this->local_coords.at(0) - 1 + x; + shifted_coords[1] = this->local_coords.at(1) - 1 + y; + shifted_coords[2] = this->local_coords.at(2) - 1 + z; - MPI_Cart_rank(global_comm, shifted_coords, &(processes[vertex][0])); + MPI_Cart_rank(this->global_comm, shifted_coords, &(processes[vertex][0])); - shifted_coords[0] = local_coords[0] + x; - shifted_coords[1] = local_coords[1] - 1 + y; - shifted_coords[2] = local_coords[2] - 1 + z; + shifted_coords[0] = this->local_coords.at(0) + x; + shifted_coords[1] = this->local_coords.at(1) - 1 + y; + shifted_coords[2] = this->local_coords.at(2) - 1 + z; - MPI_Cart_rank(global_comm, shifted_coords, &(processes[vertex][1])); + MPI_Cart_rank(this->global_comm, shifted_coords, &(processes[vertex][1])); - shifted_coords[0] = local_coords[0] - 1 + x; - shifted_coords[1] = local_coords[1] + y; - shifted_coords[2] = local_coords[2] - 1 + z; + shifted_coords[0] = this->local_coords.at(0) - 1 + x; + shifted_coords[1] = this->local_coords.at(1) + y; + shifted_coords[2] = this->local_coords.at(2) - 1 + z; - MPI_Cart_rank(global_comm, shifted_coords, &(processes[vertex][2])); + MPI_Cart_rank(this->global_comm, shifted_coords, &(processes[vertex][2])); - shifted_coords[0] = local_coords[0] + x; - shifted_coords[1] = local_coords[1] + y; - shifted_coords[2] = local_coords[2] - 1 + z; + shifted_coords[0] = this->local_coords.at(0) + x; + shifted_coords[1] = this->local_coords.at(1) + y; + shifted_coords[2] = this->local_coords.at(2) - 1 + z; - MPI_Cart_rank(global_comm, shifted_coords, &(processes[vertex][3])); + MPI_Cart_rank(this->global_comm, shifted_coords, &(processes[vertex][3])); - shifted_coords[0] = local_coords[0] - 1 + x; - shifted_coords[1] = local_coords[1] - 1 + y; - shifted_coords[2] = local_coords[2] + z; + shifted_coords[0] = this->local_coords.at(0) - 1 + x; + shifted_coords[1] = this->local_coords.at(1) - 1 + y; + shifted_coords[2] = this->local_coords.at(2) + z; - MPI_Cart_rank(global_comm, shifted_coords, &(processes[vertex][4])); + MPI_Cart_rank(this->global_comm, shifted_coords, &(processes[vertex][4])); - shifted_coords[0] = local_coords[0] + x; - shifted_coords[1] = local_coords[1] - 1 + y; - shifted_coords[2] = local_coords[2] + z; + shifted_coords[0] = this->local_coords.at(0) + x; + shifted_coords[1] = this->local_coords.at(1) - 1 + y; + shifted_coords[2] = this->local_coords.at(2) + z; - MPI_Cart_rank(global_comm, shifted_coords, &(processes[vertex][5])); + MPI_Cart_rank(this->global_comm, shifted_coords, &(processes[vertex][5])); - shifted_coords[0] = local_coords[0] - 1 + x; - shifted_coords[1] = local_coords[1] + y; - shifted_coords[2] = local_coords[2] + z; + shifted_coords[0] = this->local_coords.at(0) - 1 + x; + shifted_coords[1] = this->local_coords.at(1) + y; + shifted_coords[2] = this->local_coords.at(2) + z; - MPI_Cart_rank(global_comm, shifted_coords, &(processes[vertex][6])); + MPI_Cart_rank(this->global_comm, shifted_coords, &(processes[vertex][6])); - shifted_coords[0] = local_coords[0] + x; - shifted_coords[1] = local_coords[1] + y; - shifted_coords[2] = local_coords[2] + z; + shifted_coords[0] = this->local_coords.at(0) + x; + shifted_coords[1] = this->local_coords.at(1) + y; + shifted_coords[2] = this->local_coords.at(2) + z; - MPI_Cart_rank(global_comm, shifted_coords, &(processes[vertex][7])); + MPI_Cart_rank(this->global_comm, shifted_coords, &(processes[vertex][7])); MPI_Group_incl(all, 8, &processes[vertex][0], &groups[vertex]); - MPI_Comm_create(global_comm, groups[vertex], &communicators[vertex]); + MPI_Comm_create(this->global_comm, groups[vertex], &communicators[vertex]); // get local rank in vertex communicator MPI_Comm_rank(communicators[vertex],&vertex_rank[vertex]); @@ -450,7 +358,7 @@ template <class T, class W> void ALL_Unstructured_LB<T,W>::setup(MPI_Comm comm) } for (int i = 0; i < 8; ++i) { - if (i == local_rank) + if (i == this->local_rank) { std::cout << "local rank: " << i << " " << std::endl; for (int k = 0; k < 8; ++k) @@ -477,56 +385,66 @@ template <class T, class W> void ALL_Unstructured_LB<T,W>::setup(MPI_Comm comm) } */ - int dim_vert[dimension]; - for (int d = 0; d < dimension; ++d) - { - dim_vert[d] = global_dims[d]; - } +#ifdef ALL_DEBUG_ENABLED + MPI_Barrier(this->global_comm); + if (this->local_rank == 0) std::cout << "ALL_Unstructured<T,W>::setup() preparing communicators..." << std::endl; +#endif + std::vector<int> dim_vert(this->global_dims); MPI_Comm tmp_comm; int own_vertex; for (int i = 0; i < n_vertices; ++i) - communicators[i] = MPI_COMM_SELF; - - for (int iz = 0; iz < dim_vert[2]; ++iz) + communicators.at(i) = MPI_COMM_SELF; + +#ifdef ALL_DEBUG_ENABLED + MPI_Barrier(this->global_comm); + if (this->local_rank == 0) std::cout << "ALL_Unstructured<T,W>::setup() computing communicators..." << std::endl; + std::cout << "DEBUG: " << " rank: " << this->local_rank + << " dim_vert: " << dim_vert.at(0) << " " << dim_vert.at(1) << " " << dim_vert.at(2) + << " size(local_coords): " << this->local_coords.size() << " " + << " size(global_dims): " << this->global_dims.size() + << " size(vertex_rank): " << vertex_rank.size() + << std::endl; +#endif + for (int iz = 0; iz < dim_vert.at(2); ++iz) { - for (int iy = 0; iy < dim_vert[1]; ++iy) + for (int iy = 0; iy < dim_vert.at(1); ++iy) { - for (int ix = 0; ix < dim_vert[0]; ++ix) + for (int ix = 0; ix < dim_vert.at(0); ++ix) { - if (ix == ( ( local_coords[0] + 0 ) % global_dims[0]) && - iy == ( ( local_coords[1] + 0 ) % global_dims[1]) && - iz == ( ( local_coords[2] + 0 ) % global_dims[2]) ) + if (ix == ( ( this->local_coords.at(0) + 0 ) % this->global_dims.at(0)) && + iy == ( ( this->local_coords.at(1) + 0 ) % this->global_dims.at(1)) && + iz == ( ( this->local_coords.at(2) + 0 ) % this->global_dims.at(2)) ) own_vertex = 0; - else if (ix == ( ( local_coords[0] + 1 ) % global_dims[0]) && - iy == ( ( local_coords[1] + 0 ) % global_dims[1]) && - iz == ( ( local_coords[2] + 0 ) % global_dims[2]) ) + else if (ix == ( ( this->local_coords.at(0) + 1 ) % this->global_dims.at(0)) && + iy == ( ( this->local_coords.at(1) + 0 ) % this->global_dims.at(1)) && + iz == ( ( this->local_coords.at(2) + 0 ) % this->global_dims.at(2)) ) own_vertex = 1; - else if (ix == ( ( local_coords[0] + 0 ) % global_dims[0]) && - iy == ( ( local_coords[1] + 1 ) % global_dims[1]) && - iz == ( ( local_coords[2] + 0 ) % global_dims[2]) ) + else if (ix == ( ( this->local_coords.at(0) + 0 ) % this->global_dims.at(0)) && + iy == ( ( this->local_coords.at(1) + 1 ) % this->global_dims.at(1)) && + iz == ( ( this->local_coords.at(2) + 0 ) % this->global_dims.at(2)) ) own_vertex = 2; - else if (ix == ( ( local_coords[0] + 1 ) % global_dims[0]) && - iy == ( ( local_coords[1] + 1 ) % global_dims[1]) && - iz == ( ( local_coords[2] + 0 ) % global_dims[2]) ) + else if (ix == ( ( this->local_coords.at(0) + 1 ) % this->global_dims.at(0)) && + iy == ( ( this->local_coords.at(1) + 1 ) % this->global_dims.at(1)) && + iz == ( ( this->local_coords.at(2) + 0 ) % this->global_dims.at(2)) ) own_vertex = 3; - else if (ix == ( ( local_coords[0] + 0 ) % global_dims[0]) && - iy == ( ( local_coords[1] + 0 ) % global_dims[1]) && - iz == ( ( local_coords[2] + 1 ) % global_dims[2]) ) + else if (ix == ( ( this->local_coords.at(0) + 0 ) % this->global_dims.at(0)) && + iy == ( ( this->local_coords.at(1) + 0 ) % this->global_dims.at(1)) && + iz == ( ( this->local_coords.at(2) + 1 ) % this->global_dims.at(2)) ) own_vertex = 4; - else if (ix == ( ( local_coords[0] + 1 ) % global_dims[0]) && - iy == ( ( local_coords[1] + 0 ) % global_dims[1]) && - iz == ( ( local_coords[2] + 1 ) % global_dims[2]) ) + else if (ix == ( ( this->local_coords.at(0) + 1 ) % this->global_dims.at(0)) && + iy == ( ( this->local_coords.at(1) + 0 ) % this->global_dims.at(1)) && + iz == ( ( this->local_coords.at(2) + 1 ) % this->global_dims.at(2)) ) own_vertex = 5; - else if (ix == ( ( local_coords[0] + 0 ) % global_dims[0]) && - iy == ( ( local_coords[1] + 1 ) % global_dims[1]) && - iz == ( ( local_coords[2] + 1 ) % global_dims[2]) ) + else if (ix == ( ( this->local_coords.at(0) + 0 ) % this->global_dims.at(0)) && + iy == ( ( this->local_coords.at(1) + 1 ) % this->global_dims.at(1)) && + iz == ( ( this->local_coords.at(2) + 1 ) % this->global_dims.at(2)) ) own_vertex = 6; - else if (ix == ( ( local_coords[0] + 1 ) % global_dims[0]) && - iy == ( ( local_coords[1] + 1 ) % global_dims[1]) && - iz == ( ( local_coords[2] + 1 ) % global_dims[2]) ) + else if (ix == ( ( this->local_coords.at(0) + 1 ) % this->global_dims.at(0)) && + iy == ( ( this->local_coords.at(1) + 1 ) % this->global_dims.at(1)) && + iz == ( ( this->local_coords.at(2) + 1 ) % this->global_dims.at(2)) ) own_vertex = 7; else own_vertex = -1; @@ -535,63 +453,67 @@ template <class T, class W> void ALL_Unstructured_LB<T,W>::setup(MPI_Comm comm) int result; if (own_vertex >= 0) { - vertex_rank[own_vertex] = 7 - own_vertex; - result = MPI_Comm_split(global_comm,1,vertex_rank[own_vertex],&communicators[own_vertex]); - MPI_Comm_rank(communicators[own_vertex],&vertex_rank[own_vertex]); + vertex_rank.at(own_vertex) = 7 - own_vertex; + result = MPI_Comm_split(this->global_comm,1,vertex_rank.at(own_vertex),&communicators.at(own_vertex)); + MPI_Comm_rank(communicators.at(own_vertex),&vertex_rank.at(own_vertex)); } else { - result = MPI_Comm_split(global_comm,0,0,&tmp_comm); + result = MPI_Comm_split(this->global_comm,0,0,&tmp_comm); } if (result != MPI_SUCCESS) { - std::cout << "RANK: " << local_rank << " ERROR in MPI_Comm_Split" << std::endl; + std::cout << "RANK: " << this->local_rank << " ERROR in MPI_Comm_Split" << std::endl; } } } +#ifdef ALL_DEBUG_ENABLED + MPI_Barrier(this->global_comm); + if (this->local_rank == 0) std::cout << "ALL_Unstructured<T,W>::setup() finished computing communicators..." << std::endl; +#endif } for (int vertex = 0; vertex < n_vertices; ++vertex) - MPI_Comm_rank(communicators[vertex],&vertex_rank[vertex]); + MPI_Comm_rank(communicators.at(vertex),&vertex_rank.at(vertex)); } // TODO: periodic boundary conditions (would require size of the system) // for now: do not change the vertices along the edge of the system -template <class T, class W> void ALL_Unstructured_LB<T,W>::balance() +template <class T, class W> void ALL_Unstructured_LB<T,W>::balance(int /*step*/) { // geometrical center and work of each neighbor for each vertex // work is cast to vertex data type, since in the end a shift // of the vertex is computed - T vertex_info[n_vertices*n_vertices*(dimension+2)]; - T center[dimension]; + T vertex_info[n_vertices*n_vertices*(this->dimension+2)]; + T center[this->dimension]; // local geometric center - T local_info[(dimension+2) * n_vertices]; + T local_info[(this->dimension+2) * n_vertices]; - for (int i = 0; i < n_vertices*n_vertices*(dimension+2); ++i) + for (int i = 0; i < n_vertices*n_vertices*(this->dimension+2); ++i) vertex_info[i] = -1; - for (int d = 0; d < (dimension+2) * n_vertices; ++d) + for (int d = 0; d < (this->dimension+2) * n_vertices; ++d) local_info[d] = (T)0; - for (int d = 0; d < dimension; ++d) + for (int d = 0; d < this->dimension; ++d) center[d] = (T)0; // compute geometric center for (int v = 0; v < n_vertices; ++v) { - for (int d = 0; d < dimension; ++d) + for (int d = 0; d < this->dimension; ++d) { - center[d] += ( (vertices->at(v)).x(d) / (T)n_vertices ); + center[d] += ( (this->vertices.at(v)).x(d) / (T)n_vertices ); } - local_info[v * (dimension+2) + dimension] = (T)work; + local_info[v * (this->dimension+2) + this->dimension] = (T)this->work.at(0); } for (int v = 0; v < n_vertices; ++v) { - for (int d = 0; d < dimension; ++d) + for (int d = 0; d < this->dimension; ++d) { - local_info[v * (dimension+2) + d] = center[d] - (vertices->at(v)).x(d); + local_info[v * (this->dimension+2) + d] = center[d] - (this->vertices.at(v)).x(d); } } @@ -665,32 +587,32 @@ template <class T, class W> void ALL_Unstructured_LB<T,W>::balance() // compute for each vertex the maximum movement towards the center - local_info[ 0 * (dimension+2) + dimension+1 ] = - distance( vertices, 0, 1, 2, 4, local_rank); + local_info[ 0 * (this->dimension+2) + this->dimension+1 ] = + distance( &(this->vertices), 0, 1, 2, 4, this->local_rank); - local_info[ 1 * (dimension+2) + dimension+1 ] = - distance( vertices, 1, 0, 3, 5, local_rank); + local_info[ 1 * (this->dimension+2) + this->dimension+1 ] = + distance( &(this->vertices), 1, 0, 3, 5, this->local_rank); - local_info[ 2 * (dimension+2) + dimension+1 ] = - distance( vertices, 2, 3, 0, 6, local_rank); + local_info[ 2 * (this->dimension+2) + this->dimension+1 ] = + distance( &(this->vertices), 2, 3, 0, 6, this->local_rank); - local_info[ 3 * (dimension+2) + dimension+1 ] = - distance( vertices, 3, 2, 1, 7, local_rank); + local_info[ 3 * (this->dimension+2) + this->dimension+1 ] = + distance( &(this->vertices), 3, 2, 1, 7, this->local_rank); - local_info[ 4 * (dimension+2) + dimension+1 ] = - distance( vertices, 4, 5, 6, 0, local_rank); + local_info[ 4 * (this->dimension+2) + this->dimension+1 ] = + distance( &(this->vertices), 4, 5, 6, 0, this->local_rank); - local_info[ 5 * (dimension+2) + dimension+1 ] = - distance( vertices, 5, 4, 7, 1, local_rank); + local_info[ 5 * (this->dimension+2) + this->dimension+1 ] = + distance( &(this->vertices), 5, 4, 7, 1, this->local_rank); - local_info[ 6 * (dimension+2) + dimension+1 ] = - distance( vertices, 6, 7, 4, 2, local_rank); + local_info[ 6 * (this->dimension+2) + this->dimension+1 ] = + distance( &(this->vertices), 6, 7, 4, 2, this->local_rank); - local_info[ 7 * (dimension+2) + dimension+1 ] = - distance( vertices, 7, 6, 5, 3, local_rank); + local_info[ 7 * (this->dimension+2) + this->dimension+1 ] = + distance( &(this->vertices), 7, 6, 5, 3, this->local_rank); /* - if (local_rank == 4) + if (this->local_rank == 4) { std::cout << "max dists: " << local_info[ 0 * (dimension+2) + dimension+1 ] << " " @@ -712,45 +634,45 @@ template <class T, class W> void ALL_Unstructured_LB<T,W>::balance() for (int v = 0; v < n_vertices; ++v) { - MPI_Iallgather(&local_info[v*(dimension+2)],dimension+2,mpi_data_type_T, - &vertex_info[v*n_vertices*(dimension+2)],dimension+2,mpi_data_type_T, + MPI_Iallgather(&local_info[v*(this->dimension+2)],this->dimension+2,mpi_data_type_T, + &vertex_info[v*n_vertices*(this->dimension+2)],this->dimension+2,mpi_data_type_T, communicators[v],&request[v]); } MPI_Waitall(n_vertices,request,status); // compute new position for vertex 7 (if not periodic) T total_work = (T)0; - T shift_vectors[dimension * n_vertices]; + T shift_vectors[this->dimension * n_vertices]; for (int v = 0; v < n_vertices; ++v) { - for (int d = 0; d < dimension; ++d) + for (int d = 0; d < this->dimension; ++d) { - shift_vectors[v * dimension + d] = (T)0; + shift_vectors[v * this->dimension + d] = (T)0; } } // get total work for (int v = 0; v < n_vertices; ++v) { - if (vertex_info[(n_vertices-1)*n_vertices+v*(dimension+2)+dimension] > 1e-6) - total_work += vertex_info[(n_vertices-1)*n_vertices+v*(dimension+2)+dimension]; + if (vertex_info[(n_vertices-1)*n_vertices+v*(this->dimension+2)+this->dimension] > 1e-6) + total_work += vertex_info[(n_vertices-1)*n_vertices+v*(this->dimension+2)+this->dimension]; } // determine shortest distance between vertex and a neighbor T cdist = std::max( 1e-6, - vertex_info[(n_vertices-1)*(n_vertices*(dimension+2))+0*(dimension+2)+dimension+1] + vertex_info[(n_vertices-1)*(n_vertices*(this->dimension+2))+0*(this->dimension+2)+this->dimension+1] ); for (int v = 1; v < n_vertices; ++v) { - if ( vertex_info[(n_vertices-1)*(n_vertices*(dimension+2))+v*(dimension+2)+dimension+1] > 0 ) + if ( vertex_info[(n_vertices-1)*(n_vertices*(this->dimension+2))+v*(this->dimension+2)+this->dimension+1] > 0 ) { cdist = std::min ( cdist, - vertex_info[(n_vertices-1)*(n_vertices*(dimension+2))+v*(dimension+2)+dimension+1] + vertex_info[(n_vertices-1)*(n_vertices*(this->dimension+2))+v*(this->dimension+2)+this->dimension+1] ); } } @@ -762,44 +684,44 @@ template <class T, class W> void ALL_Unstructured_LB<T,W>::balance() { for (int i = 0; i < n_vertices; ++i) { - for (int d = 0; d < dimension; ++d) + for (int d = 0; d < this->dimension; ++d) { - if (local_coords[d] != global_dims[d] - 1) + if (this->local_coords.at(d) != this->global_dims.at(d) - 1) { //if ( vertex_info[(n_vertices-1)*(n_vertices*(dimension+2))+i*(dimension+2)+dimension] - ( total_work / (T)n_vertices ) > 0) //{ - shift_vectors[(n_vertices-1)*dimension + d] += - ( vertex_info[(n_vertices-1)*(n_vertices*(dimension+2))+i*(dimension+2)+dimension] - ( total_work / (T)n_vertices ) ) * - vertex_info[(n_vertices-1)*(n_vertices*(dimension+2))+i*(dimension+2)+d] - / ( (T)2.0 * gamma * total_work * (T)n_vertices); + shift_vectors[(n_vertices-1)*this->dimension + d] += + ( vertex_info[(n_vertices-1)*(n_vertices*(this->dimension+2))+i*(this->dimension+2)+this->dimension] - ( total_work / (T)n_vertices ) ) * + vertex_info[(n_vertices-1)*(n_vertices*(this->dimension+2))+i*(this->dimension+2)+d] + / ( (T)2.0 * this->gamma * total_work * (T)n_vertices); //} } } } // distance of shift T shift = (T)0; - for (int d = 0; d < dimension; ++d) - shift += shift_vectors[(n_vertices-1)*dimension + d] * shift_vectors[(n_vertices-1)*dimension + d]; + for (int d = 0; d < this->dimension; ++d) + shift += shift_vectors[(n_vertices-1)*this->dimension + d] * shift_vectors[(n_vertices-1)*this->dimension + d]; shift = sqrt(shift); if (std::abs(shift) >= 1e-6 && shift > cdist) { // normalize length of shift vector to half the distance to the nearest center - for (int d = 0; d < dimension; ++d) - shift_vectors[(n_vertices-1)*dimension + d] *= cdist / shift; + for (int d = 0; d < this->dimension; ++d) + shift_vectors[(n_vertices-1)*this->dimension + d] *= cdist / shift; } } for (int i = 0; i < n_vertices; ++i) { - MPI_Ibcast(&shift_vectors[i*dimension],dimension,mpi_data_type_T,0,communicators[i],&request[i]); + MPI_Ibcast(&shift_vectors[i*this->dimension],this->dimension,mpi_data_type_T,0,communicators[i],&request[i]); } MPI_Waitall(n_vertices,request,status); - shifted_vertices = vertices; + this->shifted_vertices = this->vertices; /* - if (local_rank == 7) + if (this->local_rank == 7) { std::cout << "center: "; for (int d = 0; d < dimension; ++d) @@ -811,18 +733,18 @@ template <class T, class W> void ALL_Unstructured_LB<T,W>::balance() */ for (int v = 0; v < n_vertices; ++v) { - //if (local_rank == 4) std::cout << "vertex " << v << ": "; - for (int d = 0; d < dimension; ++d) + //if (this->local_rank == 4) std::cout << "vertex " << v << ": "; + for (int d = 0; d < this->dimension; ++d) { - shifted_vertices->at(v).set_coordinate(d, vertices->at(v).x(d) + shift_vectors[v * dimension + d]); - if (shifted_vertices->at(v).x(d) != shifted_vertices->at(v).x(d)) + this->shifted_vertices.at(v).set_coordinate(d, this->vertices.at(v).x(d) + shift_vectors[v * this->dimension + d]); + if (this->shifted_vertices.at(v).x(d) != this->shifted_vertices.at(v).x(d)) { - std::cout << local_rank << " " << v << " " << d << std::endl; + std::cout << this->local_rank << " " << v << " " << d << std::endl; MPI_Abort(MPI_COMM_WORLD,-200); } - //if (local_rank == 4) std::cout << shifted_vertices->at(v).x(d) << " " << shift_vectors[v * dimension + d] << " "; + //if (this->local_rank == 4) std::cout << shifted_vertices->at(v).x(d) << " " << shift_vectors[v * dimension + d] << " "; } - //if (local_rank == 4) std::cout << std::endl; + //if (this->local_rank == 4) std::cout << std::endl; } } diff --git a/include/ALL_Voronoi.hpp b/include/ALL_Voronoi.hpp index f79e2c3c7cfb51f1abc8e553dc7399e8256055d9..6c4b742e6870577c4c501ad94cf168856dcf4acf 100644 --- a/include/ALL_Voronoi.hpp +++ b/include/ALL_Voronoi.hpp @@ -48,90 +48,50 @@ POSSIBILITY OF SUCH DAMAGE. #include <algorithm> #include "ALL_CustomExceptions.hpp" #include "ALL_Point.hpp" +#include "ALL_LB.hpp" #include "voro++.hh" // T: data type of vertices used in the balancer // W: data type of the work used in the balancer -template <class T, class W> class ALL_Voronoi_LB +template <class T, class W> class ALL_Voronoi_LB : public ALL_LB<T,W> { public: ALL_Voronoi_LB() {} - ALL_Voronoi_LB(int d, W w, T g) : dimension(d), - work(w), - gamma(g) + ALL_Voronoi_LB(int d, W w, T g) : + ALL_LB<T,W>(d,g) { - // only need the generator point - vertices = new T[2*dimension]; - shifted_vertices = new T[dimension]; - // set size for system size array - sys_size.resize(6); - - loadbalancing_step = 0; + this->set_work(w); } - ~ALL_Voronoi_LB(); - - void set_vertices(T*); - void set_vertices(std::vector<ALL_Point<T>>&); + ~ALL_Voronoi_LB() override; voro::voronoicell vc; // setup communicators for the exchange of work in each domain - void setup(MPI_Comm); + void setup() override; // do a load-balancing step - virtual void balance(); + virtual void balance(int step) override; // getter for variables (passed by reference to avoid // direct access to private members of the object) - // getter for base vertices - void get_vertices(T*); - // getter for shifted vertices - void get_shifted_vertices(T*); - // getter for shifted vertices - void get_shifted_vertices(std::vector<T>&); - // getter for shifted vertices - void get_shifted_vertices(std::vector<ALL_Point<T>>&); - // neighbors // provide list of neighbors - virtual void get_neighbors(std::vector<int>&); + void get_neighbors(std::vector<int>&) override; // provide list of neighbors in each direction - virtual void get_neighbors(int**); + void get_neighbors(int**) override; // return generator points of the neighboring cells (to be able to recreate // local and neighboring cells) - virtual void get_neighbor_vertices(std::vector<T>&); - - // set system size for periodic corrections - void set_sys_size(std::vector<T>&); - - void set_step(int s) { loadbalancing_step = s; } + void get_neighbor_vertices(std::vector<T>&); + // empty override for setting method specific data + virtual void set_data(void*) override {} + private: - // dimension of the system - int dimension; - - // shift cooeficient - T gamma; - - // work per domain - W work; - - // vertices - T* vertices; - - // vertices after load-balancing shift - T* shifted_vertices; - // MPI values - // global communicator - MPI_Comm global_comm; - // rank of the local domain in the communicator - int local_rank; - - + // type for MPI communication MPI_Datatype mpi_data_type_T; MPI_Datatype mpi_data_type_W; @@ -146,95 +106,24 @@ template <class T, class W> class ALL_Voronoi_LB // collection of generator points std::vector<T> generator_points; - // size of system (for periodic boxes / systems) - std::vector<T> sys_size; - // number of ranks in global communicator int n_domains; - - int loadbalancing_step; }; template <class T, class W> ALL_Voronoi_LB<T,W>::~ALL_Voronoi_LB() { - if (vertices) delete vertices; - if (shifted_vertices) delete shifted_vertices; -} - -template <class T, class W> void ALL_Voronoi_LB<T,W>::get_vertices(T* result) -{ - for (int i = 0; i < dimension; ++i) - { - result[i] = vertices[i]; - } -} - -template <class T, class W> void ALL_Voronoi_LB<T,W>::get_shifted_vertices(T* result) -{ - for (int i = 0; i < dimension; ++i) - { - result[i] = shifted_vertices[i]; - } -} - -// getter for shifted vertices -template <class T, class W> void ALL_Voronoi_LB<T,W>::get_shifted_vertices(std::vector<ALL_Point<T>>& result) -{ - ALL_Point<T> p( - 3, - { shifted_vertices[0], - shifted_vertices[1], - shifted_vertices[2] } - ); - result.clear(); - result.push_back(p); -} - -template <class T, class W> void ALL_Voronoi_LB<T,W>::get_shifted_vertices(std::vector<T>& result) -{ - for (int i = 0; i < dimension; ++i) - { - result.at(i) = shifted_vertices[i]; - } -} - -// set the actual vertices (unsafe due to array copy) -template <class T, class W> void ALL_Voronoi_LB<T,W>::set_vertices(T* v) -{ - for (int i = 0; i < 2*dimension; ++i) - { - vertices[i] = v[i]; - } -} - -// set the actual vertices -template <class T, class W> void ALL_Voronoi_LB<T,W>::set_vertices(std::vector<ALL_Point<T>>& _v) -{ - for (int v = 0; v < 2; ++v) - for (int i = 0; i < dimension; ++i) - { - vertices[dimension * v + i] = _v.at(v).x(i); - } -} - -template <class T, class W> void ALL_Voronoi_LB<T,W>::set_sys_size(std::vector<T>& ss) -{ - for (auto i = 0; i < sys_size.size(); ++i) - { - sys_size.at(i) = ss.at(i); - } } // setup routine for the tensor-based load-balancing scheme // requires: -// global_comm (int): cartesian MPI communicator, from +// this->global_comm (int): cartesian MPI communicator, from // which separate sub communicators // are derived in order to represent // each plane of domains in the system -template <class T, class W> void ALL_Voronoi_LB<T,W>::setup(MPI_Comm comm) +template <class T, class W> void ALL_Voronoi_LB<T,W>::setup() { // store global communicator - global_comm = comm; + int dimension = this->get_dimension(); // no special communicator required @@ -244,8 +133,8 @@ template <class T, class W> void ALL_Voronoi_LB<T,W>::setup(MPI_Comm comm) // of the local cell -> large-scale Voronoi grid creation // is too expensive to be done // every step - MPI_Comm_size(global_comm, &n_domains); - MPI_Comm_rank(global_comm, &local_rank); + MPI_Comm_size(this->global_comm, &n_domains); + MPI_Comm_rank(this->global_comm, &this->local_rank); generator_points.resize(n_domains*(2*dimension+1)); // determine correct MPI data type for template T @@ -262,32 +151,37 @@ template <class T, class W> void ALL_Voronoi_LB<T,W>::setup(MPI_Comm comm) } -template<class T, class W> void ALL_Voronoi_LB<T,W>::balance() +template<class T, class W> void ALL_Voronoi_LB<T,W>::balance(int loadbalancing_step) { + int dimension = this->get_dimension(); // collect local information in array int info_length = 2*dimension+1; std::vector<T> local_info(info_length); + std::vector<ALL_Point<T>>& vertices = this->get_vertices(); + std::vector<ALL_Point<T>>& shifted_vertices = this->get_shifted_vertices(); + std::vector<W>& work = this->get_work(); - for (auto d = 0; d < 2*dimension; ++d) - { - local_info.at(d) = vertices[d]; - } - local_info.at(2*dimension) = (T)work; + for (auto i = 0; i < 2; ++i) + for (auto d = 0; d < dimension; ++d) + { + local_info.at(d) = vertices.at(i).x(d); + } + local_info.at(2*dimension) = (T)work.at(0); MPI_Allgather(local_info.data(), info_length, mpi_data_type_T, generator_points.data(), info_length, mpi_data_type_T, - global_comm); + this->global_comm); // create Voronoi-cells // TODO: check if system is periodic or not -> true / false! voro::container con_old( - sys_size.at(0), - sys_size.at(1), - sys_size.at(2), - sys_size.at(3), - sys_size.at(4), - sys_size.at(5), + this->sys_size.at(0), + this->sys_size.at(1), + this->sys_size.at(2), + this->sys_size.at(3), + this->sys_size.at(4), + this->sys_size.at(5), ALL_VORONOI_SUBBLOCKS, ALL_VORONOI_SUBBLOCKS, ALL_VORONOI_SUBBLOCKS, @@ -325,7 +219,7 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance() loadbalancing_step++; - if (local_rank == 0) + if (this->local_rank == 0) { con_old.draw_particles(ss_local_gp_gnu.str().c_str()); con_old.draw_cells_gnuplot(ss_local_vc_gnu.str().c_str()); @@ -362,7 +256,7 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance() cl_old.pos(pid,x,y,z,r); if (i == 0) { - if (pid == local_rank) + if (pid == this->local_rank) { con_old.compute_cell(c,cl_old); c.neighbors(next_neighbors.at(idx)); @@ -394,7 +288,7 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance() std::sort(neighbors.begin(),neighbors.end()); auto uniq_it = std::unique(neighbors.begin(), neighbors.end()); neighbors.resize( std::distance(neighbors.begin(), uniq_it) ); - neighbors.erase(std::remove(neighbors.begin(),neighbors.end(),local_rank),neighbors.end()); + neighbors.erase(std::remove(neighbors.begin(),neighbors.end(),this->local_rank),neighbors.end()); neighbors.erase(std::remove_if(neighbors.begin(),neighbors.end(),[](int x){return x < 0;}),neighbors.end()); } @@ -424,7 +318,7 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance() con_old.compute_cell(c,cl_old); volumes.at(idx) = c.volume(); } - if (pid == local_rank) + if (pid == this->local_rank) { con_old.compute_cell(c,cl_old); local_volume = c.volume(); @@ -432,16 +326,16 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance() } while(cl_old.inc()); } - - MPI_Barrier(global_comm); - if(local_rank == 0) std::cout << "still sane?" << std::endl; - +#ifdef ALL_DEBUG_ENABLED + MPI_Barrier(this->global_comm); + if(this->local_rank == 0) std::cout << "ALL_Voronoi<T,W>::balance begin checking..." << std::endl; +#endif /* for (int i = 0; i < 25; ++i) { - if (local_rank == i) + if (this->local_rank == i) { - std::cout << local_rank << ": "; + std::cout << this->local_rank << ": "; for (auto n : neighbors) std::cout << " " << n << " "; std::cout << "| " << neighbors.size() << std::endl; @@ -449,7 +343,7 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance() std::cout << " " << s << " "; std::cout << std::endl; } - MPI_Barrier(global_comm); + MPI_Barrier(this->global_comm); } */ @@ -465,6 +359,11 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance() work_load += generator_points.at(info_length * neighbor + info_length - 1); } +#ifdef ALL_DEBUG_ENABLED + MPI_Barrier(this->global_comm); + if(this->local_rank == 0) std::cout << "ALL_Voronoi<T,W>::balance find neighbors..." << std::endl; +#endif + T max_diff = 20.0; for (auto it = neighbors.begin(); it != neighbors.end(); ++it) @@ -477,14 +376,14 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance() diff.at(d) = generator_points.at(info_length * neighbor + dimension + d) - local_info.at(d); //diff.at(d) = generator_points.at(info_length * neighbor + d) - local_info.at(d); - if (diff.at(d) > 0.5 * (sys_size[2*d+1] - sys_size[2*d])) + if (diff.at(d) > 0.5 * (this->sys_size[2*d+1] - this->sys_size[2*d])) { - diff.at(d) -= (sys_size[2*d+1] - sys_size[2*d]); + diff.at(d) -= (this->sys_size[2*d+1] - this->sys_size[2*d]); correct = false; } - else if (diff.at(d) < -0.5 * (sys_size[2*d+1] - sys_size[2*d])) + else if (diff.at(d) < -0.5 * (this->sys_size[2*d+1] - this->sys_size[2*d])) { - diff.at(d) += (sys_size[2*d+1] - sys_size[2*d]); + diff.at(d) += (this->sys_size[2*d+1] - this->sys_size[2*d]); correct = false; } @@ -517,19 +416,24 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance() for (int d = 0; d < dimension; ++d) { //shift.at(d) += 0.5 * work_diff / work_density * surfaces.at(std::distance(neighbors.begin(),it)) * diff.at(d); - shift.at(d) += 0.5 * work_diff * diff.at(d) / gamma; + shift.at(d) += 0.5 * work_diff * diff.at(d) / this->gamma; } } } +#ifdef ALL_DEBUG_ENABLED + MPI_Barrier(this->global_comm); + if(this->local_rank == 0) std::cout << "ALL_Voronoi<T,W>::balance after neighbors found..." << std::endl; +#endif + norm = sqrt ( shift.at(0) * shift.at(0) + shift.at(1) * shift.at(1) + shift.at(2) * shift.at(2) ); /* for (int i = 0; i < 25; ++i) { - if (local_rank == i) + if (this->local_rank == i) { - std::cout << local_rank << ": "; + std::cout << this->local_rank << ": "; for (auto n : local_info) std::cout << " " << n << " "; std::cout << " | "; @@ -537,15 +441,20 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance() std::cout << " " << n << " "; std::cout << std::endl; } - MPI_Barrier(global_comm); + MPI_Barrier(this->global_comm); } */ T scale = 1.0; - if (norm > 0.45 * max_diff) + if (norm > 0.45 * max_diff) scale = 0.45 * max_diff / norm; +#ifdef ALL_DEBUG_ENABLED + MPI_Barrier(this->global_comm); + if(this->local_rank == 0) std::cout << "ALL_Voronoi<T,W>::balance find new neighbors..." << std::endl; +#endif + // to find new neighbors for (int d = 0; d < dimension; ++d) @@ -561,26 +470,31 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance() :local_info.at(d)); */ // periodic correction of points - local_info.at(d) = (local_info.at(d) < sys_size.at(2*d)) - ?sys_size.at(2*d)+1.0 - : ((local_info.at(d) >= sys_size.at(2*d+1)) - ?sys_size.at(2*d+1)-1.0 + local_info.at(d) = (local_info.at(d) < this->sys_size.at(2*d)) + ?this->sys_size.at(2*d)+1.0 + : ((local_info.at(d) >= this->sys_size.at(2*d+1)) + ?this->sys_size.at(2*d+1)-1.0 :local_info.at(d)); } MPI_Allgather(local_info.data(), info_length, mpi_data_type_T, generator_points.data(), info_length, mpi_data_type_T, - global_comm); + this->global_comm); + +#ifdef ALL_DEBUG_ENABLED + MPI_Barrier(this->global_comm); + if(this->local_rank == 0) std::cout << "ALL_Voronoi<T,W>::balance create new containers..." << std::endl; +#endif // create Voronoi-cells // TODO: check if system is periodic or not -> true / false! voro::container con_new( - sys_size.at(0), - sys_size.at(1), - sys_size.at(2), - sys_size.at(3), - sys_size.at(4), - sys_size.at(5), + this->sys_size.at(0), + this->sys_size.at(1), + this->sys_size.at(2), + this->sys_size.at(3), + this->sys_size.at(4), + this->sys_size.at(5), ALL_VORONOI_SUBBLOCKS, ALL_VORONOI_SUBBLOCKS, ALL_VORONOI_SUBBLOCKS, @@ -618,7 +532,7 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance() loadbalancing_step++; - if (local_rank == 0) + if (this->local_rank == 0) { con_new.draw_particles(ss_local_gp_gnu2.str().c_str()); con_new.draw_cells_gnuplot(ss_local_vc_gnu2.str().c_str()); @@ -627,6 +541,11 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance() } */ +#ifdef ALL_DEBUG_ENABLED + MPI_Barrier(this->global_comm); + if(this->local_rank == 0) std::cout << "ALL_Voronoi<T,W>::balance storing shifted vertices..." << std::endl; +#endif + for (int d = 0; d < dimension; ++d) shifted_vertices[d] = local_info.at(d); @@ -651,7 +570,7 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance() cl_new.pos(pid,x,y,z,r); if (i == 0) { - if (pid == local_rank) + if (pid == this->local_rank) { con_new.compute_cell(c,cl_new); c.neighbors(next_neighbors.at(idx)); @@ -680,7 +599,7 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance() std::sort(neighbors.begin(),neighbors.end()); auto uniq_it = std::unique(neighbors.begin(), neighbors.end()); neighbors.resize( std::distance(neighbors.begin(), uniq_it) ); - neighbors.erase(std::remove(neighbors.begin(),neighbors.end(),local_rank),neighbors.end()); + neighbors.erase(std::remove(neighbors.begin(),neighbors.end(),this->local_rank),neighbors.end()); neighbors.erase(std::remove_if(neighbors.begin(),neighbors.end(),[](int x){return x < 0;}),neighbors.end()); } @@ -722,9 +641,9 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance() /* for (int i = 0; i < 25; ++i) { - if (local_rank == i) + if (this->local_rank == i) { - std::cout << local_rank << ": "; + std::cout << this->local_rank << ": "; for (auto n : neighbors) std::cout << " " << n << " "; std::cout << "| " << neighbors.size() << std::endl; @@ -735,7 +654,7 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance() } std::cout << std::endl; } - MPI_Barrier(global_comm); + MPI_Barrier(this->global_comm); } */ diff --git a/src/ALL_fortran.cpp b/src/ALL_fortran.cpp index 7ab556dc02b5526768bdb94f4187558e05c70c45..80494ad151333e6e64166080ad9cca41d4b444d1 100644 --- a/src/ALL_fortran.cpp +++ b/src/ALL_fortran.cpp @@ -34,9 +34,9 @@ extern "C" { // wrapper to create a ALL<double,double> object (should be the // most commonly used one) - ALL<double,double>* ALL_init_f(const int dim, double gamma) + ALL<double,double>* ALL_init_f(ALL_LB_t method, const int dim, double gamma) { - return new ALL<double,double>(dim,gamma); + return new ALL<double,double>(method,dim,gamma); } // set grid parameters @@ -74,15 +74,15 @@ extern "C" } // wrapper to setup routine - void ALL_setup_f(ALL<double,double>* all_obj, ALL_LB_t method) + void ALL_setup_f(ALL<double,double>* all_obj) { - all_obj->setup(method); + all_obj->setup(); } // wrapper to call balance routine - void ALL_balance_f(ALL<double,double>* all_obj, ALL_LB_t method) + void ALL_balance_f(ALL<double,double>* all_obj) { - all_obj->balance(method); + all_obj->balance(); } // wrapper to get number of new vertices diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f223173decfc0aafeb13549d37660352ac0374f4..e94f5e6df07d104718b856ff60f05d2a1361d0e5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,6 +1,7 @@ set(ALL_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/include) -set(ALL_HEADER_FILES ${ALL_INCLUDE_DIR}/ALL.hpp - ${ALL_INCLUDE_DIR}/ALL_Staggered.hpp +set(ALL_HEADER_FILES ${ALL_INCLUDE_DIR}/ALL.hpp + ${ALL_INCLUDE_DIR}/ALL_LB.hpp + ${ALL_INCLUDE_DIR}/ALL_Staggered.hpp ${ALL_INCLUDE_DIR}/ALL_Tensor.hpp ${ALL_INCLUDE_DIR}/ALL_Unstructured.hpp ${ALL_INCLUDE_DIR}/ALL_Voronoi.hpp