diff --git a/CMakeLists.txt b/CMakeLists.txt index de77a504aa99fd8974a82f6c970f731924a3d181..beee3f94711eb56d26cedefd1620f95c064861c7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -76,10 +76,13 @@ if(CM_ALL_VTK_OUTPUT) if(NOT VTK_FOUND) message(FATAL_ERROR "VTK not found, help CMake to find it by setting VTK_LIBRARY and VTK_INCLUDE_DIR") else() + message("VTK found in ${VTK_DIR}") if (VTK_MAJOR_VERSION GREATER_EQUAL 9) find_package(VTK REQUIRED COMPONENTS CommonCore CommonDataModel + FiltersHybrid + FiltersModeling FiltersProgrammable IOParallelXML IOXML @@ -89,6 +92,8 @@ if(CM_ALL_VTK_OUTPUT) find_package(VTK REQUIRED COMPONENTS vtkCommonCore vtkCommonDataModel + vtkFiltersHybrid + vtkFiltersModeling vtkFiltersProgrammable vtkIOParallelXML vtkIOXML diff --git a/example/ALL_test.cpp b/example/ALL_test.cpp index 5827b7df32e17cd763300179e6f0ea636870d77f..d21fc5161b1ded7d676265043a6d6e24c65547ee 100644 --- a/example/ALL_test.cpp +++ b/example/ALL_test.cpp @@ -53,6 +53,12 @@ #include <vtkVersion.h> #include <vtkXMLPPolyDataWriter.h> #include <vtkXMLPolyDataWriter.h> + +// for FORCEBASED test +#include <vtkDelaunay3D.h> +#include <vtkNew.h> +#include <vtkDataSetSurfaceFilter.h> +#include <vtkSelectEnclosedPoints.h> #endif #define BOX_SIZE 300.0 @@ -60,7 +66,7 @@ #define N_GENERATE 1000 #define SEED 123789456u #define N_LOOP 500 -#define OUTPUT_INTV 1 +#define OUTPUT_INTV 10 #define MAX_NEIG 1024 #define ALL_HISTOGRAM_DEFAULT_WIDTH 1.0 @@ -122,8 +128,8 @@ void generate_points(std::vector<ALL::Point<double>> &points, * long + 4*n_point double : point data ****************************************************************/ -void read_points(std::vector<ALL::Point<double>> &points, std::vector<double> l, - std::vector<double> u, int &n_points, char *filename, +void read_points(std::vector<ALL::Point<double>> &points, + int &n_points, char *filename, int dimension, int rank, MPI_Comm comm) { MPI_File file; MPI_Barrier(comm); @@ -133,9 +139,9 @@ void read_points(std::vector<ALL::Point<double>> &points, std::vector<double> l, n_points = 0; - int n_ranks; + int nRanks; long offset; - MPI_Comm_size(comm, &n_ranks); + MPI_Comm_size(comm, &nRanks); if (err) { if (rank == 0) @@ -148,7 +154,7 @@ void read_points(std::vector<ALL::Point<double>> &points, std::vector<double> l, MPI_LONG, MPI_STATUS_IGNORE); // read number of points from file MPI_File_read_at(file, - (MPI_Offset)(n_ranks * sizeof(long) + rank * sizeof(int)), + (MPI_Offset)(nRanks * sizeof(long) + rank * sizeof(int)), &n_points, 1, MPI_INT, MPI_STATUS_IGNORE); double values[4]; @@ -157,11 +163,11 @@ void read_points(std::vector<ALL::Point<double>> &points, std::vector<double> l, for (int i = 0; i < n_points; ++i) { MPI_File_read_at(file, (MPI_Offset)((offset + i) * block_size + - n_ranks * (sizeof(int) + sizeof(long))), + nRanks * (sizeof(int) + sizeof(long))), &ID, 1, MPI_DOUBLE, MPI_STATUS_IGNORE); MPI_File_read_at(file, (MPI_Offset)((offset + i) * block_size + - n_ranks * (sizeof(int) + sizeof(long)) + + nRanks * (sizeof(int) + sizeof(long)) + sizeof(long)), values, 4, MPI_DOUBLE, MPI_STATUS_IGNORE); ALL::Point<double> p(dimension, &values[0], values[3], ID); @@ -210,10 +216,10 @@ void read_points(std::vector<ALL::Point<double>> &points, std::vector<double> l, // function to create a VTK output of the points in the system void print_points(std::vector<ALL::Point<double>> plist, int step, ALL::LB_t method, MPI_Comm comm) { - int rank, n_ranks; + int rank, nRanks; static bool vtk_init = false; MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &n_ranks); + MPI_Comm_size(comm, &nRanks); vtkMPIController *controller; // seperate init step required, since VORONOI does not @@ -272,7 +278,7 @@ void print_points(std::vector<ALL::Point<double>> plist, int step, auto parallel_writer = vtkSmartPointer<vtkXMLPPolyDataWriter>::New(); parallel_writer->SetFileName(ss_para.str().c_str()); - parallel_writer->SetNumberOfPieces(n_ranks); + parallel_writer->SetNumberOfPieces(nRanks); parallel_writer->SetStartPiece(rank); parallel_writer->SetEndPiece(rank); parallel_writer->SetInputData(polydata); @@ -376,7 +382,7 @@ int main(int argc, char **argv) { // setup of cartesian communicator int localRank; - int n_ranks; + int nRanks; MPI_Comm cart_comm; int local_coords[sys_dim]; int periodicity[sys_dim]; @@ -394,11 +400,11 @@ int main(int argc, char **argv) { } // get number of total ranks - MPI_Comm_size(MPI_COMM_WORLD, &n_ranks); + MPI_Comm_size(MPI_COMM_WORLD, &nRanks); if (global_dim[0] == 0) { // get distribution into number of dimensions - MPI_Dims_create(n_ranks, sys_dim, global_dim); + MPI_Dims_create(nRanks, sys_dim, global_dim); } // create cartesian MPI communicator @@ -539,7 +545,7 @@ int main(int argc, char **argv) { generate_points(points, l, u, coords, global_dim, sys_dim, n_points, localRank); } else { - read_points(points, l, u, n_points, filename, sys_dim, localRank, + read_points(points, n_points, filename, sys_dim, localRank, cart_comm); } double *transfer; @@ -581,7 +587,7 @@ int main(int argc, char **argv) { } double n_total; MPI_Allreduce(&n_local, &n_total, 1, MPI_DOUBLE, MPI_SUM, cart_comm); - double avg_work = (double)n_total / (double)n_ranks; + double avg_work = (double)n_total / (double)nRanks; double n_min, n_max; MPI_Allreduce(&n_local, &n_min, 1, MPI_DOUBLE, MPI_MIN, cart_comm); MPI_Allreduce(&n_local, &n_max, 1, MPI_DOUBLE, MPI_MAX, cart_comm); @@ -595,9 +601,10 @@ int main(int argc, char **argv) { // get starting number of particles MPI_Allreduce(&n_local, &total_points, 1, MPI_DOUBLE, MPI_SUM, cart_comm); + // output of borders / contents - if (n_ranks < 216) { - for (int i = 0; i < n_ranks; ++i) { + if (nRanks < 216) { + for (int i = 0; i < nRanks; ++i) { if (localRank == i) { std::ofstream of; of.open("domain_data.dat", std::ios::out | std::ios::app); @@ -610,7 +617,7 @@ int main(int argc, char **argv) { of << " " << n_local << " "; of << std::endl; - if (i == n_ranks - 1) + if (i == nRanks - 1) of << std::endl; of.close(); MPI_Barrier(cart_comm); @@ -623,7 +630,7 @@ int main(int argc, char **argv) { if (chosen_method == ALL::LB_t::VORONOI) { // one-time particle output to voronoi/particles.pov - for (int i = 0; i < n_ranks; ++i) { + for (int i = 0; i < nRanks; ++i) { if (localRank == i) { std::ofstream of; if (i != 0) @@ -641,7 +648,6 @@ int main(int argc, char **argv) { MPI_Barrier(cart_comm); } - int minpoints = 0; double cow_sys[sys_dim + 1]; double target_point[sys_dim + 1]; @@ -665,11 +671,11 @@ int main(int argc, char **argv) { // experimental: as a first step try to find more optimal start points for (int i_preloop = 0; i_preloop < ALL_VORONOI_PREP_STEPS; ++i_preloop) { // get neighbor information - int nNeighbors = n_ranks; - std::vector<double> neighbor_vertices(sys_dim * n_ranks); - std::vector<int> neighbors(n_ranks); + int nNeighbors = nRanks; + std::vector<double> neighbor_vertices(sys_dim * nRanks); + std::vector<int> neighbors(nRanks); - for (int n = 0; n < n_ranks; ++n) + for (int n = 0; n < nRanks; ++n) neighbors.at(n) = n; double local_vertex[sys_dim]; @@ -852,7 +858,7 @@ int main(int argc, char **argv) { */ // output of borders / contents - for (int i = 0; i < n_ranks; ++i) { + for (int i = 0; i < nRanks; ++i) { if (localRank == i) { std::ofstream of; if (!weighted_points) @@ -864,7 +870,7 @@ int main(int argc, char **argv) { of << " " << vertices.at(0)[0] << " " << vertices.at(0)[1] << " " << vertices.at(0)[2] << " " << n_points << std::endl; - if (i == n_ranks - 1) + if (i == nRanks - 1) of << std::endl; of.close(); MPI_Barrier(cart_comm); @@ -888,7 +894,7 @@ int main(int argc, char **argv) { gamma *= 2.0; limit_efficiency /= 2.0; } - if (localRank == 0) + if (localRank == 0 && (i_loop % OUTPUT_INTV == 0)) std::cout << "loop " << i_loop << ": " << std::endl; std::vector<double> work; std::vector<int> n_bins(3, -1); @@ -1013,6 +1019,32 @@ int main(int argc, char **argv) { } #endif } + + // computing center of points for ForceBased method + if (chosen_method == ALL::LB_t::FORCEBASED) + { + ALL::Point<double> cow(3); + for (int i = 0; i < 3; ++i) + cow[i] = 0.0; + if (points.size() > 0) + { + for (auto p : points) + { + cow = cow + p; + } + cow = cow * (1.0 / (double)points.size()); + } + else + { + for (auto v : vertices) + { + cow = cow + v; + } + cow = cow * (1.0 / (double)vertices.size()); + } + lb_obj.setMethodData(&cow); + } + #ifdef ALL_DEBUG_ENABLED MPI_Barrier(cart_comm); if (localRank == 0) @@ -1069,9 +1101,11 @@ int main(int argc, char **argv) { int n_points_global = 0; MPI_Reduce(&n_points, &n_points_global, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); +#ifdef ALL_DEBUG_ENABLED if (localRank == 0) std::cout << "number of particles in step " << i_loop << ": " << n_points_global << std::endl; +#endif #ifdef ALL_DEBUG_ENABLED MPI_Barrier(cart_comm); if (localRank == 0) @@ -1154,7 +1188,6 @@ int main(int argc, char **argv) { P--; } } - MPI_Status status; MPI_Request sreq_r, rreq_r; MPI_Request sreq_l, rreq_l; @@ -1519,224 +1552,59 @@ int main(int argc, char **argv) { } #ifdef ALL_VTK_OUTPUT -#ifdef ALL_VTK_FORCE_SORT - // creating an unstructured grid for local domain and neighbors - auto vtkpoints = vtkSmartPointer<vtkPoints>::New(); - auto unstructuredGrid = vtkSmartPointer<vtkForceBasedGrid>::New(); - for (int i = 0; i < 27; ++i) { - for (int v = 0; v < 8; ++v) { - vtkpoints->InsertNextPoint(comm_vertices[i * 24 + v * 3], - comm_vertices[i * 24 + v * 3 + 1], - comm_vertices[i * 24 + v * 3 + 2]); - } - } - unstructuredGrid->SetPoints(vtkpoints); - auto work = vtkSmartPointer<vtkFloatArray>::New(); - work->SetNumberOfComponents(1); - work->SetNumberOfTuples(27); - work->SetName("Cell"); + // VTK data sets describing the convex hulls of each domain + vtkSmartPointer<vtkSelectEnclosedPoints> dataSets[27]; - for (int n = 0; n < 27; ++n) { - // define grid points, i.e. vertices of local domain - vtkIdType pointIds[8] = {8 * n + 0, 8 * n + 1, 8 * n + 2, 8 * n + 3, - 8 * n + 4, 8 * n + 5, 8 * n + 6, 8 * n + 7}; - - auto faces = vtkSmartPointer<vtkCellArray>::New(); - // setup faces of polyhedron - vtkIdType f0[3] = {8 * n + 0, 8 * n + 2, 8 * n + 1}; - vtkIdType f1[3] = {8 * n + 1, 8 * n + 2, 8 * n + 3}; - - vtkIdType f2[3] = {8 * n + 0, 8 * n + 4, 8 * n + 2}; - vtkIdType f3[3] = {8 * n + 2, 8 * n + 4, 8 * n + 6}; - - vtkIdType f4[3] = {8 * n + 2, 8 * n + 6, 8 * n + 3}; - vtkIdType f5[3] = {8 * n + 3, 8 * n + 6, 8 * n + 7}; - - vtkIdType f6[3] = {8 * n + 1, 8 * n + 5, 8 * n + 3}; - vtkIdType f7[3] = {8 * n + 3, 8 * n + 5, 8 * n + 7}; - - vtkIdType f8[3] = {8 * n + 0, 8 * n + 4, 8 * n + 1}; - vtkIdType f9[3] = {8 * n + 1, 8 * n + 4, 8 * n + 5}; - - vtkIdType fa[3] = {8 * n + 4, 8 * n + 6, 8 * n + 5}; - vtkIdType fb[3] = {8 * n + 5, 8 * n + 6, 8 * n + 7}; - - faces->InsertNextCell(3, f0); - faces->InsertNextCell(3, f1); - faces->InsertNextCell(3, f2); - faces->InsertNextCell(3, f3); - faces->InsertNextCell(3, f4); - faces->InsertNextCell(3, f5); - faces->InsertNextCell(3, f6); - faces->InsertNextCell(3, f7); - faces->InsertNextCell(3, f8); - faces->InsertNextCell(3, f9); - faces->InsertNextCell(3, fa); - faces->InsertNextCell(3, fb); - - unstructuredGrid->InsertNextCell(VTK_POLYHEDRON, 8, pointIds, 12, - faces->GetPointer()); - work->SetValue(n, (double)n); - } - unstructuredGrid->GetCellData()->AddArray(work); + // vtkPoints list containing all points + vtkSmartPointer<vtkPoints> allPoints = vtkSmartPointer<vtkPoints>::New(); - /* Debug output: print local cell and neighbors */ - /* - if (localRank == 26) - { - auto writer = vtkSmartPointer<vtkXMLForceBasedGridWriter>::New(); - writer->SetInputData(unstructuredGrid); - writer->SetFileName("test.vtu"); - writer->SetDataModeToAscii(); - //writer->SetDataModeToBinary(); - writer->Write(); + // fill list + for (auto P = points.begin(); P != points.end(); ++P) + { + ALL::Point<double> p = *P; + double pcoords[3]; + for (int d = 0; d < 3; ++d) + pcoords[d] = p[d]; + allPoints->InsertNextPoint(pcoords); } - */ - MPI_Allreduce(&n_points, &check_np, 1, MPI_INT, MPI_SUM, cart_comm); + vtkSmartPointer<vtkPolyData> allData = vtkSmartPointer<vtkPolyData>::New(); + allData->SetPoints(allPoints); - auto locator = vtkSmartPointer<vtkCellLocator>::New(); - locator->SetDataSet(unstructuredGrid); - locator->BuildLocator(); -#else - auto in_triangle = [=](double nv[27 * 8 * 3], int n, int A, int B, - int C, double x, double y, double z) { - ALL::Point<double> a(3, nv + n * 24 + A * 8); - ALL::Point<double> b(3, nv + n * 24 + B * 8); - ALL::Point<double> c(3, nv + n * 24 + C * 8 + 0); - ALL::Point<double> p(3); - p[0] = x; - p[1] = y; - p[2] = z; - return false; - // return p.same_side_line(a, b, c) && - // p.same_side_line(b, c, a) && - // p.same_side_line(c, a, b); - }; - - // check if (x,y,z) is on the same side as A in - // regard to the plane spanned by (B,C,D) - auto same_side_plane = [=](double nv[27 * 8 * 3], int n, int A, int B, - int C, int D, double x, double y, double z) { - // compute difference vectors required: - // C-B - double CBx = nv[n * 24 + C * 8 + 0] - nv[n * 24 + B * 8 + 0]; - double CBy = nv[n * 24 + C * 8 + 1] - nv[n * 24 + B * 8 + 1]; - double CBz = nv[n * 24 + C * 8 + 2] - nv[n * 24 + B * 8 + 2]; - // D-B - double DBx = nv[n * 24 + D * 8 + 0] - nv[n * 24 + B * 8 + 0]; - double DBy = nv[n * 24 + D * 8 + 1] - nv[n * 24 + B * 8 + 1]; - double DBz = nv[n * 24 + D * 8 + 2] - nv[n * 24 + B * 8 + 2]; - // A-B - double ABx = nv[n * 24 + A * 8 + 0] - nv[n * 24 + B * 8 + 0]; - double ABy = nv[n * 24 + A * 8 + 1] - nv[n * 24 + B * 8 + 1]; - double ABz = nv[n * 24 + A * 8 + 2] - nv[n * 24 + B * 8 + 2]; - // P-B - double PBx = x - nv[n * 24 + B * 8 + 0]; - double PBy = y - nv[n * 24 + B * 8 + 1]; - double PBz = z - nv[n * 24 + B * 8 + 2]; - - // compute normal vector of plane - // n = (C-B) x (D-B) - double nx = CBy * DBz - CBz * DBy; - double ny = CBz * DBx - CBx * DBz; - double nz = CBx * DBy - CBy * DBx; - - // compute dot product <A-B,n> - double ABn = ABx * nx + ABy * ny + ABz * nz; - - // compute dot product <P-B,n> - double PBn = PBx * nx + PBy * ny + PBz * nz; - - /* - if ( - x <= TRACK_X+0.01 && x >= TRACK_X && - y <= TRACK_Y+0.01 && y >= TRACK_Y && - z <= TRACK_Z+0.01 && z >= TRACK_Z - ) + // create each data set + for (int i = 0; i < 27; ++i) + { + // create points structure + vtkSmartPointer<vtkPoints> domVertices = vtkSmartPointer<vtkPoints>::New(); + // fill point structure with data + for (int j = 0; j < 8; ++j) { - std::cerr << "Tracked Particle: " - << localRank << " ( " - << x << " , " - << y << " , " - << z << " ) " - << n << ", " - << A << ", " - << B << ", " - << C << ", " - << D << ") " - << ( ( std::abs(PBn) < 1e-12 ) && in_triangle(nv, n, B, - C, D, x, y, z) ) << " " - << ( ( ABn * PBn ) > 0 ) - << std::endl; - } - */ - - if (std::abs(PBn) < 1e-12) { - return in_triangle(nv, n, B, C, D, x, y, z); - } else - // check if sign matches - return ((ABn * PBn) > 0); - }; - - // check if (x,y,z) is in the tetrahedron described - // by the points within the array t - auto in_tetraeder = [=](double nv[27 * 8 * 3], int n, int t[4], - double x, double y, double z) { - return same_side_plane(nv, n, t[0], t[1], t[2], t[3], x, y, z) && - same_side_plane(nv, n, t[3], t[0], t[1], t[2], x, y, z) && - same_side_plane(nv, n, t[2], t[3], t[0], t[1], x, y, z) && - same_side_plane(nv, n, t[1], t[2], t[3], t[0], x, y, z); - }; - - auto containing_neighbor = [=](double nv[27 * 8 * 3], bool e[27], - double x, double y, double z) { - // definition of tetrahedra - /* - int tetrahedra[8][4] = - { - { 0, 1, 2, 4 }, - { 1, 0, 3, 5 }, - { 2, 3, 0, 6 }, - { 3, 2, 1, 7 }, - { 4, 5, 6, 0 }, - { 5, 4, 7, 1 }, - { 6, 7, 4, 2 }, - { 7, 6, 5, 3 } - }; - */ - int tetrahedra[5][4] = {{1, 2, 4, 7}, - {0, 1, 2, 4}, - {1, 2, 3, 7}, - {1, 4, 5, 7}, - {2, 4, 6, 7}}; - - // check if the particle is in the local - // tetrahedron, if so keep it - // (needed to decide what about particles - // on surfaces) - for (int t = 0; t < 5; ++t) { - if (in_tetraeder(nv, 13, tetrahedra[t], x, y, z)) - return 13; - } - - // loop over neighbors - for (int n = 0; n < 27; ++n) { - if (!e[n] || n == 13) - continue; - // loop over tetrahedra - for (int t = 0; t < 5; ++t) { - if (in_tetraeder(nv, n, tetrahedra[t], x, y, z)) - return n; - } + domVertices->InsertNextPoint(comm_vertices + 24 * i + 3 * j); } + vtkSmartPointer<vtkPolyData> domSet = vtkSmartPointer<vtkPolyData>::New(); + domSet->SetPoints(domVertices); + // create a Delaunay grid of the points + vtkNew<vtkDelaunay3D> delaunay; + delaunay->SetInputData(domSet); + delaunay->Update(); + // extract the surface + vtkSmartPointer<vtkDataSetSurfaceFilter> surfaceFilter = vtkSmartPointer<vtkDataSetSurfaceFilter>::New(); + surfaceFilter->SetInputConnection(delaunay->GetOutputPort()); + vtkDataSetSurfaceFilter::SetGlobalWarningDisplay(0); + surfaceFilter->Update(); + // create vtkPolyData + vtkSmartPointer<vtkPolyData> surfaceData = vtkSmartPointer<vtkPolyData>::New(); + surfaceData = surfaceFilter->GetOutput(); + + dataSets[i] = vtkSmartPointer<vtkSelectEnclosedPoints>::New(); + dataSets[i]->SetInputData(allData); + dataSets[i]->SetSurfaceData(surfaceData); + dataSets[i]->Update(); + } - return -1; - }; - -#endif + // setup counters for send / recv actions int n_send[27]; int n_recv[27]; for (int i = 0; i < 27; ++i) { @@ -1744,51 +1612,78 @@ int main(int argc, char **argv) { n_recv[i] = 0; } - int check_np = 0; - int check_np_new = 0; - - for (auto P = points.begin(); P != points.end(); ++P) { - ALL::Point<double> p = *P; - double pcoords[3]; - double pccoords[3]; - int subId; - double weights[4]; - for (int d = 0; d < 3; ++d) - pcoords[d] = p[d]; -#ifdef ALL_VTK_FORCE_SORT - vtkIdType cellId = locator->FindCell(pcoords); -#else - int cellId = containing_neighbor(comm_vertices, exists, pcoords[0], - pcoords[1], pcoords[2]); -#endif - /* - vtkIdType cellId = - unstructuredGrid->FindCell(pcoords, NULL, 0, 1e-6, subId, pccoords, - weights); - */ + std::vector<int> sendTarget(allPoints->GetNumberOfPoints(), -1); + + // check if point stays in local domain + for(int j = 0; j < allPoints->GetNumberOfPoints(); ++j) + { + if (dataSets[13]->IsInside(j)) + { + n_send[13]++; + sendTarget.at(j) = 13; + } + } - // if the particle is in a valid neighboring cell - if (cellId >= 0 && cellId != 13 && cellId <= 26) { - if (n_send[cellId] == max_particles) { - throw ALL::InvalidArgumentException( - __FILE__, __func__, __LINE__, - "Trying to send more particles than \ - buffer size allows!"); + // iterate over neighbors to fill send buffers + for(int i = 0; i < 27; ++i) + { + if (i == 13) continue; + for(int j = 0; j < allPoints->GetNumberOfPoints(); ++j) + { + if (sendTarget.at(j) != -1) continue; + if (dataSets[i]->IsInside(j) ) + { + sendTarget.at(j) = i; + if (n_send[i] == max_particles) { + throw ALL::InvalidArgumentException( + __FILE__, __func__, __LINE__, + "Trying to send more particles than \ + buffer size allows!"); + } + for (int d = 0; d < 3; ++d) { + transfer[i * (sys_dim + 1) * max_particles + + n_send[i] * (sys_dim + 1) + d] = points.at(j)[d]; + } + transfer[i * (sys_dim + 1) * max_particles + + n_send[i] * (sys_dim + 1) + sys_dim] = points.at(j).getWeight(); + n_send[i]++; } - for (int d = 0; d < 3; ++d) { - transfer[cellId * (sys_dim + 1) * max_particles + - n_send[cellId] * (sys_dim + 1) + d] = p[d]; + } + } + + for(int i = allPoints->GetNumberOfPoints() - 1; i >= 0; --i) + { + if (sendTarget.at(i) < 0) + { + sendTarget.at(i) = 13; + } + if (sendTarget.at(i) != 13) + { + points.erase(points.begin() + i); + n_points--; + } + } + + /* + for (int j = 0; j < nRanks; ++j) + { + if (localRank == j) + { + int cnt = 0; + for (int i = 0; i < 27; ++i) + { + std::cout << n_send[i] << " "; + cnt += n_send[i]; } - transfer[cellId * (sys_dim + 1) * max_particles + - n_send[cellId] * (sys_dim + 1) + sys_dim] = p.getWeight(); - n_send[cellId]++; - points.erase(P); - --P; - --n_points; + MPI_Barrier(cart_comm); } + else + MPI_Barrier(cart_comm); } + */ for (int n = 0; n < 27; ++n) { + if (n == 13) continue; MPI_Isend(&n_send[n], 1, MPI_INT, neighbors.at(n), 1020, cart_comm, &request[n]); MPI_Irecv(&n_recv[n], 1, MPI_INT, neighbors.at(n), 1020, cart_comm, @@ -1797,6 +1692,7 @@ int main(int argc, char **argv) { MPI_Waitall(54, request, status); for (int n = 0; n < 27; ++n) { + if (n == 13) continue; MPI_Isend(&transfer[n * (sys_dim + 1) * max_particles], (sys_dim + 1) * n_send[n], MPI_DOUBLE, neighbors.at(n), 1030, cart_comm, &request[n]); @@ -1807,6 +1703,7 @@ int main(int argc, char **argv) { MPI_Waitall(54, request, status); for (int n = 0; n < 27; ++n) { + if (n == 13) continue; if (exists[n]) { for (int i = 0; i < n_recv[n]; ++i) { ALL::Point<double> p( @@ -1816,10 +1713,17 @@ int main(int argc, char **argv) { sys_dim]); points.push_back(p); ++n_points; + if (p[0] < 0.5 && p[1] < 0.5 && p[2] < 0.5) + { + std::cout << "Particle put to zero: " << localRank << " " << neighbors.at(n) << " " << i << " " << p << std::endl; + } } } } + //MPI_Barrier(cart_comm); + //MPI_Abort(cart_comm,-999); + #else if (localRank == 0) std::cout << "Currently no FORCEBASED test without VTK!" @@ -2182,8 +2086,10 @@ int main(int argc, char **argv) { #ifdef ALL_VTK_OUTPUT // if (localRank == 0) // std::cout << "creating vtk outlines output" << std::endl; + /* if (chosen_method != ALL::LB_t::FORCEBASED) lb_obj.printVTKoutlines(output_step); + */ // if (localRank == 0) // std::cout << "creating vtk vertices output" << std::endl; if (chosen_method == ALL::LB_t::FORCEBASED) @@ -2217,7 +2123,7 @@ int main(int argc, char **argv) { cart_comm); MPI_Allreduce(&n_local, &n_total, 1, MPI_DOUBLE, MPI_SUM, cart_comm); - avg_work = n_total / (double)n_ranks; + avg_work = n_total / (double)nRanks; MPI_Allreduce(&n_local, &n_min, 1, MPI_DOUBLE, MPI_MIN, cart_comm); MPI_Allreduce(&n_local, &n_max, 1, MPI_DOUBLE, MPI_MAX, cart_comm); d_min = n_min / avg_work; @@ -2264,7 +2170,7 @@ int main(int argc, char **argv) { } // output of borders / contents - for (int i = 0; i < n_ranks; ++i) { + for (int i = 0; i < nRanks; ++i) { if (localRank == i) { std::ofstream of; if (!weighted_points) @@ -2294,7 +2200,7 @@ int main(int argc, char **argv) { of << n_local << " "; of << std::endl; - if (i == n_ranks - 1) + if (i == nRanks - 1) of << std::endl; of.close(); MPI_Barrier(cart_comm); @@ -2334,7 +2240,7 @@ int main(int argc, char **argv) { 2.0 * width[1] * width[2] + 2.0 * width[0] * width[2]; - for (int r = 0; r < n_ranks; ++r) { + for (int r = 0; r < nRanks; ++r) { if (r == localRank) { std::ofstream of; of.open("domain_data.dat", std::ios::out | std::ios::app); @@ -2346,8 +2252,6 @@ int main(int argc, char **argv) { MPI_Barrier(cart_comm); } } - if (chosen_method == ALL::LB_t::FORCEBASED) - lb_obj.printVTKvertices(output_step); #endif output_step++; #ifdef ALL_VORONOI_ACTIVE @@ -2362,12 +2266,12 @@ int main(int argc, char **argv) { // create binary output (e.g. for HimeLB) // format: - // n_ranks integers (number of particles per domain) - // n_ranks integers (offset in file) + // nRanks integers (number of particles per domain) + // nRanks integers (offset in file) // <n_particles[0]> * (long + 3*double) double values (particle positions) // open file - int err; + int known_unused err; MPI_File outfile; err = MPI_File_open(MPI_COMM_WORLD, "particles_output.bin", @@ -2387,18 +2291,20 @@ int main(int argc, char **argv) { long block_size = sizeof(long); // + 3 * sizeof(double); offset *= block_size; - offset += n_ranks * (sizeof(int) + sizeof(long)); + offset += nRanks * (sizeof(int) + sizeof(long)); MPI_File_write_at( - outfile, (MPI_Offset)(n_ranks * sizeof(int) + localRank * sizeof(long)), + outfile, (MPI_Offset)(nRanks * sizeof(int) + localRank * sizeof(long)), &offset, 1, MPI_LONG, MPI_STATUS_IGNORE); for (int n = 0; n < n_points; ++n) { long id = points.at(n).get_id(); + /* double loc[3]; loc[0] = points.at(n)[0]; loc[1] = points.at(n)[1]; loc[2] = points.at(n)[2]; + */ MPI_File_write_at(outfile, (MPI_Offset)((offset + n * block_size)), &id, 1, MPI_LONG, MPI_STATUS_IGNORE); /* @@ -2415,9 +2321,9 @@ int main(int argc, char **argv) { /* // output of borders / contents - if (n_ranks < 216) + if (nRanks < 216) { - for (int i = 0; i < n_ranks; ++i) + for (int i = 0; i < nRanks; ++i) { if (localRank == i) { @@ -2438,7 +2344,7 @@ int main(int argc, char **argv) { of << " " << n_local << " "; of << std::endl; - if (i == n_ranks - 1) of << std::endl; + if (i == nRanks - 1) of << std::endl; of.close(); MPI_Barrier(cart_comm); } @@ -2460,6 +2366,7 @@ int main(int argc, char **argv) { MPI_Finalize(); return EXIT_SUCCESS; } catch (ALL::CustomException &e) { + std::cout << "Error caught: " << std::endl; std::cout << e.what() << std::endl; return EXIT_FAILURE; } diff --git a/include/ALL.hpp b/include/ALL.hpp index f0c685ecc23c53da38e19791f3df8bd30bb07d67..b6dbcc25d1ebe360e540318aca9f6eac97a7f0f3 100644 --- a/include/ALL.hpp +++ b/include/ALL.hpp @@ -66,6 +66,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include <vtkVoxel.h> #include <vtkXMLPUnstructuredGridWriter.h> #include <vtkXMLUnstructuredGridWriter.h> +#ifdef VTK_CELL_ARRAY_V2 +#include <vtkNew.h> +#endif #endif namespace ALL { @@ -542,14 +545,19 @@ public: // define rank array (length = 4, x,y,z, rank) int rank = 0; MPI_Comm_rank(communicator, &rank); + auto coords = vtkSmartPointer<vtkFloatArray>::New(); + coords->SetNumberOfComponents(3); + coords->SetNumberOfTuples(1); + coords->SetName("coords"); + coords->SetValue(0, procGridLoc.at(0)); + coords->SetValue(1, procGridLoc.at(1)); + coords->SetValue(2, procGridLoc.at(2)); + auto rnk = vtkSmartPointer<vtkFloatArray>::New(); - rnk->SetNumberOfComponents(4); + rnk->SetNumberOfComponents(1); rnk->SetNumberOfTuples(1); - rnk->SetName("rank"); - rnk->SetValue(0, procGridLoc.at(0)); - rnk->SetValue(1, procGridLoc.at(1)); - rnk->SetValue(2, procGridLoc.at(2)); - rnk->SetValue(3, rank); + rnk->SetName("MPI rank"); + rnk->SetValue(0, rank); // define tag array (length = 1) auto tag = vtkSmartPointer<vtkIntArray>::New(); @@ -605,6 +613,7 @@ public: unstructuredGrid->GetCellData()->AddArray(work); unstructuredGrid->GetCellData()->AddArray(expanse); unstructuredGrid->GetCellData()->AddArray(rnk); + unstructuredGrid->GetCellData()->AddArray(coords); unstructuredGrid->GetCellData()->AddArray(tag); createDirectory("vtk_outline"); @@ -662,11 +671,14 @@ public: local_vertices[nVertices * balancer->getDimension()] = (T)balancer->getWork().at(0); + /* T *global_vertices; if (local_rank == 0) { global_vertices = new T[n_ranks * (nVertices * balancer->getDimension() + 1)]; } + */ + T global_vertices[n_ranks * (nVertices * balancer->getDimension() + 1)]; // collect all works and vertices on a single process MPI_Gather(local_vertices, nVertices * balancer->getDimension() + 1, @@ -676,7 +688,12 @@ public: if (local_rank == 0) { auto vtkpoints = vtkSmartPointer<vtkPoints>::New(); +#ifdef VTK_CELL_ARRAY_V2 + vtkNew<vtkUnstructuredGrid> unstructuredGrid; + unstructuredGrid->Allocate(n_ranks + 10); +#else auto unstructuredGrid = vtkSmartPointer<vtkUnstructuredGrid>::New(); +#endif // enter vertices into unstructured grid for (int i = 0; i < n_ranks; ++i) { @@ -702,7 +719,29 @@ public: cell->SetNumberOfTuples(n_ranks); cell->SetName("cell id"); + for (int n = 0; n < n_ranks; ++n) { + +#ifdef VTK_CELL_ARRAY_V2 + // define grid points, i.e. vertices of local domain + vtkIdType pointIds[8] = {8 * n + 0, 8 * n + 1, 8 * n + 2, 8 * n + 3, + 8 * n + 4, 8 * n + 5, 8 * n + 6, 8 * n + 7}; + + vtkIdType faces[48] = { 3, 8 * n + 0, 8 * n + 2, 8 * n + 1, + 3, 8 * n + 1, 8 * n + 2, 8 * n + 3, + 3, 8 * n + 0, 8 * n + 4, 8 * n + 2, + 3, 8 * n + 2, 8 * n + 4, 8 * n + 6, + 3, 8 * n + 2, 8 * n + 6, 8 * n + 3, + 3, 8 * n + 3, 8 * n + 6, 8 * n + 7, + 3, 8 * n + 1, 8 * n + 5, 8 * n + 3, + 3, 8 * n + 3, 8 * n + 5, 8 * n + 7, + 3, 8 * n + 0, 8 * n + 4, 8 * n + 1, + 3, 8 * n + 1, 8 * n + 4, 8 * n + 5, + 3, 8 * n + 4, 8 * n + 6, 8 * n + 5, + 3, 8 * n + 5, 8 * n + 6, 8 * n + 7}; + + unstructuredGrid->InsertNextCell(VTK_POLYHEDRON, 8, pointIds, 12, faces); +#else // define grid points, i.e. vertices of local domain vtkIdType pointIds[8] = {8 * n + 0, 8 * n + 1, 8 * n + 2, 8 * n + 3, 8 * n + 4, 8 * n + 5, 8 * n + 6, 8 * n + 7}; @@ -742,6 +781,7 @@ public: unstructuredGrid->InsertNextCell(VTK_POLYHEDRON, 8, pointIds, 12, faces->GetPointer()); +#endif work->SetValue( n, global_vertices[n * (nVertices * balancer->getDimension() + 1) + @@ -762,7 +802,7 @@ public: // writer->SetDataModeToBinary(); writer->Write(); - delete[] global_vertices; + //delete[] global_vertices; } } #endif diff --git a/include/ALL_ForceBased.hpp b/include/ALL_ForceBased.hpp index 6ebbc3bea69a610b739f9ad0d81a8d4cceba667a..890b092af122728e97db0e1c50a80092d0b4c311 100644 --- a/include/ALL_ForceBased.hpp +++ b/include/ALL_ForceBased.hpp @@ -59,6 +59,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. gamma: correction factor to control the speed of the vertex shift + Information: + Currently only implemented for 3D systems */ #include "ALL_CustomExceptions.hpp" @@ -114,7 +116,7 @@ public: /// method to set specific data structures (unused for tensor grid method) /// @param[in] data pointer to the data structure - virtual void setAdditionalData(known_unused const void *data) override {} + virtual void setAdditionalData(known_unused const void *data) override; private: // type for MPI communication @@ -138,11 +140,11 @@ private: // number of vertices (since it is not equal for all domains) int n_vertices; - // main dimension (correction in staggered style) - int main_dim; - // secondary dimensions (2D force shift) int sec_dim[2]; + + // center of particles + Point<T> cop; }; template <class T, class W> ForceBased_LB<T, W>::~ForceBased_LB() {} @@ -153,9 +155,16 @@ std::vector<int> &ForceBased_LB<T, W>::getNeighbors() { return neighbors; } +template <class T, class W> void ForceBased_LB<T, W>::setAdditionalData(void const *data) +{ + cop.setDimension(3); + cop[0] = (*((Point<T>*)data))[0]; + cop[1] = (*((Point<T>*)data))[1]; + cop[2] = (*((Point<T>*)data))[2]; +} + template <class T, class W> void ForceBased_LB<T, W>::setup() { n_vertices = this->vertices.size(); - vertex_neighbors.resize(n_vertices * 8); // determine correct MPI data type for template T if (std::is_same<T, double>::value) @@ -208,46 +217,19 @@ template <class T, class W> void ForceBased_LB<T, W>::setup() { // get the local rank from the MPI communicator MPI_Cart_rank(this->globalComm, this->local_coords.data(), &this->localRank); - // groups required for new communicators - MPI_Group known_unused groups[n_vertices]; - // arrays of processes belonging to group - int known_unused processes[n_vertices][n_vertices]; - - // shifted local coordinates to find neighboring processes - int known_unused shifted_coords[this->dimension]; - - // get main and secondary dimensions - if (this->global_dims.at(2) >= this->global_dims.at(1)) { - if (this->global_dims.at(2) >= this->global_dims.at(0)) { - main_dim = 2; - sec_dim[0] = 0; - sec_dim[1] = 1; - } else { - main_dim = 0; - sec_dim[0] = 1; - sec_dim[1] = 2; - } - } else { - if (this->global_dims.at(1) >= this->global_dims.at(0)) { - main_dim = 1; - sec_dim[0] = 0; - sec_dim[1] = 2; - } else { - main_dim = 0; - sec_dim[0] = 1; - sec_dim[1] = 2; - } - } - - if (this->localRank == 0) - std::cout << "DEBUG: main_dim: " << main_dim << std::endl; - - // create main communicator - MPI_Comm_split(this->globalComm, this->local_coords.at(main_dim), - this->local_coords.at(sec_dim[0]) + - this->local_coords.at(sec_dim[1]) * - this->global_dims.at(sec_dim[0]), - &main_communicator); + // find list of neighbors (does not change) + neighbors.resize(27); + int n = 0; + for (int z = -1; z <= 1; ++z) + for (int y = -1; y <= 1; ++y) + for (int x = -1; x <= 1; ++x) + { + std::vector<int> coords({this->local_coords.at(0)+x, + this->local_coords.at(1)+y, + this->local_coords.at(2)+z }); + MPI_Cart_rank(this->globalComm, coords.data(), &neighbors.at(n)); + ++n; + } /* @@ -273,108 +255,7 @@ template <class T, class W> void ForceBased_LB<T, W>::setup() { // 0 - - - 1 */ - -#ifdef ALL_DEBUG_ENABLED - MPI_Barrier(this->globalComm); - if (this->localRank == 0) - std::cout << "ALL::ForceBased_LB<T,W>::setup() preparing communicators..." - << std::endl; -#endif - std::vector<int> dim_vert(this->global_dims); - - MPI_Comm known_unused tmp_comm; - int known_unused own_vertex; - -#ifdef ALL_DEBUG_ENABLED - MPI_Barrier(this->globalComm); - if (this->localRank == 0) - std::cout << "ALL::ForceBased_LB<T,W>::setup() computing communicators..." - << std::endl; - std::cout << "DEBUG: " - << " rank: " << this->localRank << " dim_vert: " << dim_vert.at(0) - << " " << dim_vert.at(1) << " " << dim_vert.at(2) - << " size(local_coords): " << this->local_coords.size() << " " - << " size(global_dims): " << this->global_dims.size() << std::endl; -#endif - for (int iz = 0; iz < dim_vert.at(2); ++iz) { - for (int iy = 0; iy < dim_vert.at(1); ++iy) { - for (int ix = 0; ix < dim_vert.at(0); ++ix) { - bool affected[8]; - for (auto &a : affected) - a = false; - int v_neighbors[8]; - for (auto &vn : vertex_neighbors) - vn = -1; - if (ix == ((this->local_coords.at(0) + 0) % dim_vert.at(0)) && - iy == ((this->local_coords.at(1) + 0) % dim_vert.at(1)) && - iz == ((this->local_coords.at(2) + 0) % dim_vert.at(2))) { - affected[0] = true; - v_neighbors[0] = this->localRank; - } - if (ix == ((this->local_coords.at(0) + 1) % dim_vert.at(0)) && - iy == ((this->local_coords.at(1) + 0) % dim_vert.at(1)) && - iz == ((this->local_coords.at(2) + 0) % dim_vert.at(2))) { - affected[1] = true; - v_neighbors[1] = this->localRank; - } - if (ix == ((this->local_coords.at(0) + 0) % dim_vert.at(0)) && - iy == ((this->local_coords.at(1) + 1) % dim_vert.at(1)) && - iz == ((this->local_coords.at(2) + 0) % dim_vert.at(2))) { - affected[2] = true; - v_neighbors[2] = this->localRank; - } - if (ix == ((this->local_coords.at(0) + 1) % dim_vert.at(0)) && - iy == ((this->local_coords.at(1) + 1) % dim_vert.at(1)) && - iz == ((this->local_coords.at(2) + 0) % dim_vert.at(2))) { - affected[3] = true; - v_neighbors[3] = this->localRank; - } - if (ix == ((this->local_coords.at(0) + 0) % dim_vert.at(0)) && - iy == ((this->local_coords.at(1) + 0) % dim_vert.at(1)) && - iz == ((this->local_coords.at(2) + 1) % dim_vert.at(2))) { - affected[4] = true; - v_neighbors[4] = this->localRank; - } - if (ix == ((this->local_coords.at(0) + 1) % dim_vert.at(0)) && - iy == ((this->local_coords.at(1) + 0) % dim_vert.at(1)) && - iz == ((this->local_coords.at(2) + 1) % dim_vert.at(2))) { - affected[5] = true; - v_neighbors[5] = this->localRank; - } - if (ix == ((this->local_coords.at(0) + 0) % dim_vert.at(0)) && - iy == ((this->local_coords.at(1) + 1) % dim_vert.at(1)) && - iz == ((this->local_coords.at(2) + 1) % dim_vert.at(2))) { - affected[6] = true; - v_neighbors[6] = this->localRank; - } - if (ix == ((this->local_coords.at(0) + 1) % dim_vert.at(0)) && - iy == ((this->local_coords.at(1) + 1) % dim_vert.at(1)) && - iz == ((this->local_coords.at(2) + 1) % dim_vert.at(2))) { - affected[7] = true; - v_neighbors[7] = this->localRank; - } - - MPI_Allreduce(MPI_IN_PLACE, v_neighbors, 8, MPI_INT, MPI_MAX, - this->globalComm); - - for (int v = 0; v < n_vertices; ++v) { - if (affected[v]) { - for (int n = 0; n < 8; ++n) { - vertex_neighbors.at(8 * v + n) = v_neighbors[n]; - } - } - } - } - } -// ToDo: check if vertices correct -#ifdef ALL_DEBUG_ENABLED - MPI_Barrier(this->globalComm); - if (this->localRank == 0) - std::cout << "ALL::ForceBased_LB<T,W>::setup() finished computing " - "communicators..." - << std::endl; -#endif - } + } // TODO: periodic boundary conditions (would require size of the system) @@ -382,410 +263,250 @@ template <class T, class W> void ForceBased_LB<T, W>::setup() { template <class T, class W> void ForceBased_LB<T, W>::balance(int /*step*/) { + + // store current vertices + this->prevVertices = this->vertices; - this->prevVertices = this->vertices; + // compute geometric center of domain + Point<T> geoCenter({0.0,0.0,0.0}); - // geometrical center and work of each neighbor for each vertex - // work is cast to vertex data type, since in the end a shift - // of the vertex is computed - T vertex_info[n_vertices * n_vertices * (this->dimension + 2)]; - T center[this->dimension]; + for (auto v : this->vertices) + { + geoCenter = geoCenter + v; + } + geoCenter = geoCenter * (1.0 / (T)n_vertices); + + std::vector<T> locInfo( { cop[0], + cop[1], + cop[2], + (T)this->work.at(0)}); + //std::vector<T> locInfo( { geoCenter[0], + // geoCenter[1], + // geoCenter[2], + // (T)this->work.at(0)}); + + // list of neighbors information is provided to and the shift is + // received from + std::vector<int> infoNeig({0,1,3,4,9,10,12,13}); + + // list of neighbors information is received from to compute + // local shift and shift is send to (mirrored to list above) + std::vector<int> shiftNeig({26,25,23,22,17,16,14,13}); + + // vector to store data required to store necessary data + std::vector<T> shiftData(32); + + // vector to store shifts for each of the vertices + std::vector<T> shifts(24); + + // vector to store MPI requests + std::vector<int> recvReq(8); + std::vector<int> sendReq(8); + + // vector to store MPI status objects + std::vector<MPI_Status> recvStat(8); + std::vector<MPI_Status> sendStat(8); + + // exchange information + for (int n = 0; n < 8; ++n) + { + // receives + MPI_Irecv( shiftData.data()+4*n, + 4, + MPIDataTypeT, + neighbors.at(shiftNeig.at(n)), + 1012, + this->globalComm, + recvReq.data()+n ); + + // sends + MPI_Isend( locInfo.data(), + 4, + MPIDataTypeT, + neighbors.at(infoNeig.at(n)), + 1012, + this->globalComm, + sendReq.data()+n ); - // local geometric center - T known_unused local_info[(this->dimension + 2) * n_vertices]; + } - for (int i = 0; i < n_vertices * n_vertices * (this->dimension + 2); ++i) - vertex_info[i] = -1; + MPI_Waitall(8,sendReq.data(),sendStat.data()); + MPI_Waitall(8,recvReq.data(),recvStat.data()); - for (int d = 0; d < (this->dimension + 2) * n_vertices; ++d) - local_info[d] = (T)0; + // compute shift + Point<T> shift(3); + shift[0] = shift[1] = shift[2] = 0.0; - for (int d = 0; d < this->dimension; ++d) - center[d] = (T)0; + auto cmp = [=](std::pair<T,int> a, std::pair<T,int> b) + { + return a.first > b.first; + }; - // compute geometric center - for (int v = 0; v < n_vertices; ++v) { - for (int d = 0; d < this->dimension; ++d) { - center[d] += ((this->prevVertices.at(v))[d] / (T)n_vertices); - } - local_info[v * (this->dimension + 2) + this->dimension] = - (T)this->work.at(0); - } - for (int v = 0; v < n_vertices; ++v) { - for (int d = 0; d < this->dimension; ++d) { - // vector pointing to center - local_info[v * (this->dimension + 2) + d] = - center[d] - (this->prevVertices.at(v))[d]; - } - } +/* - // compute for each vertex the maximum movement - switch (main_dim) { - case 0: - local_info[0 * (this->dimension + 2) + this->dimension + 1] = - 0.5 * std::min(this->prevVertices.at(0).d(this->prevVertices.at(2)), - this->prevVertices.at(0).d(this->prevVertices.at(4))); - local_info[1 * (this->dimension + 2) + this->dimension + 1] = - 0.5 * std::min(this->prevVertices.at(1).d(this->prevVertices.at(3)), - this->prevVertices.at(1).d(this->prevVertices.at(5))); - local_info[2 * (this->dimension + 2) + this->dimension + 1] = - 0.5 * std::min(this->prevVertices.at(2).d(this->prevVertices.at(0)), - this->prevVertices.at(2).d(this->prevVertices.at(6))); - local_info[3 * (this->dimension + 2) + this->dimension + 1] = - 0.5 * std::min(this->prevVertices.at(3).d(this->prevVertices.at(1)), - this->prevVertices.at(3).d(this->prevVertices.at(7))); - local_info[4 * (this->dimension + 2) + this->dimension + 1] = - 0.5 * std::min(this->prevVertices.at(4).d(this->prevVertices.at(0)), - this->prevVertices.at(4).d(this->prevVertices.at(6))); - local_info[5 * (this->dimension + 2) + this->dimension + 1] = - 0.5 * std::min(this->prevVertices.at(5).d(this->prevVertices.at(1)), - this->prevVertices.at(5).d(this->prevVertices.at(7))); - local_info[6 * (this->dimension + 2) + this->dimension + 1] = - 0.5 * std::min(this->prevVertices.at(6).d(this->prevVertices.at(2)), - this->prevVertices.at(6).d(this->prevVertices.at(4))); - local_info[7 * (this->dimension + 2) + this->dimension + 1] = - 0.5 * std::min(this->prevVertices.at(7).d(this->prevVertices.at(3)), - this->prevVertices.at(7).d(this->prevVertices.at(5))); - break; - case 1: - local_info[0 * (this->dimension + 2) + this->dimension + 1] = - 0.5 * std::min(this->prevVertices.at(0).d(this->prevVertices.at(1)), - this->prevVertices.at(0).d(this->prevVertices.at(4))); - local_info[1 * (this->dimension + 2) + this->dimension + 1] = - 0.5 * std::min(this->prevVertices.at(1).d(this->prevVertices.at(0)), - this->prevVertices.at(1).d(this->prevVertices.at(5))); - local_info[2 * (this->dimension + 2) + this->dimension + 1] = - 0.5 * std::min(this->prevVertices.at(2).d(this->prevVertices.at(3)), - this->prevVertices.at(2).d(this->prevVertices.at(6))); - local_info[3 * (this->dimension + 2) + this->dimension + 1] = - 0.5 * std::min(this->prevVertices.at(3).d(this->prevVertices.at(2)), - this->prevVertices.at(3).d(this->prevVertices.at(7))); - local_info[4 * (this->dimension + 2) + this->dimension + 1] = - 0.5 * std::min(this->prevVertices.at(4).d(this->prevVertices.at(0)), - this->prevVertices.at(4).d(this->prevVertices.at(5))); - local_info[5 * (this->dimension + 2) + this->dimension + 1] = - 0.5 * std::min(this->prevVertices.at(5).d(this->prevVertices.at(1)), - this->prevVertices.at(5).d(this->prevVertices.at(4))); - local_info[6 * (this->dimension + 2) + this->dimension + 1] = - 0.5 * std::min(this->prevVertices.at(6).d(this->prevVertices.at(2)), - this->prevVertices.at(6).d(this->prevVertices.at(7))); - local_info[7 * (this->dimension + 2) + this->dimension + 1] = - 0.5 * std::min(this->prevVertices.at(7).d(this->prevVertices.at(3)), - this->prevVertices.at(7).d(this->prevVertices.at(6))); - break; - case 2: - local_info[0 * (this->dimension + 2) + this->dimension + 1] = - 0.5 * std::min(this->prevVertices.at(0).d(this->prevVertices.at(1)), - this->prevVertices.at(0).d(this->prevVertices.at(2))); - local_info[1 * (this->dimension + 2) + this->dimension + 1] = - 0.5 * std::min(this->prevVertices.at(1).d(this->prevVertices.at(0)), - this->prevVertices.at(1).d(this->prevVertices.at(3))); - local_info[2 * (this->dimension + 2) + this->dimension + 1] = - 0.5 * std::min(this->prevVertices.at(2).d(this->prevVertices.at(0)), - this->prevVertices.at(2).d(this->prevVertices.at(3))); - local_info[3 * (this->dimension + 2) + this->dimension + 1] = - 0.5 * std::min(this->prevVertices.at(3).d(this->prevVertices.at(1)), - this->prevVertices.at(3).d(this->prevVertices.at(2))); - local_info[4 * (this->dimension + 2) + this->dimension + 1] = - 0.5 * std::min(this->prevVertices.at(4).d(this->prevVertices.at(5)), - this->prevVertices.at(4).d(this->prevVertices.at(6))); - local_info[5 * (this->dimension + 2) + this->dimension + 1] = - 0.5 * std::min(this->prevVertices.at(5).d(this->prevVertices.at(4)), - this->prevVertices.at(5).d(this->prevVertices.at(7))); - local_info[6 * (this->dimension + 2) + this->dimension + 1] = - 0.5 * std::min(this->prevVertices.at(6).d(this->prevVertices.at(4)), - this->prevVertices.at(6).d(this->prevVertices.at(7))); - local_info[7 * (this->dimension + 2) + this->dimension + 1] = - 0.5 * std::min(this->prevVertices.at(7).d(this->prevVertices.at(5)), - this->prevVertices.at(7).d(this->prevVertices.at(6))); - break; - default: - throw InternalErrorException( - __FILE__, __func__, __LINE__, - "Invalid main dimension provided (numerical value not 0, 1 or 2)."); - break; - } - // exchange information with all vertex neighbors - MPI_Request known_unused request[n_vertices]; - MPI_Status known_unused status[n_vertices]; + // compute global influence on the shift + int nRanks; + MPI_Comm_size(this->globalComm, &nRanks); + std::vector<T[4]> globalData(nRanks); - // compute new position for vertex 7 (if not periodic) - T known_unused total_work = (T)0; - T known_unused shift_vectors[this->dimension * n_vertices]; + Point<T> globalTarget(3); - for (int v = 0; v < n_vertices; ++v) { - for (int d = 0; d < this->dimension; ++d) { - shift_vectors[v * this->dimension + d] = (T)0; - } - } + MPI_Allgather(locInfo.data(),4, MPIDataTypeT, globalData.data(), 4, MPIDataTypeT, this->globalComm); + std::vector<std::pair<T,int>> globalTargets(nRanks); - // TODO: - // 1.) collect work in main_dim communicators - // 2.) exchange work and extension in main_dim to neighbors in main_dim - // 3.) correct extension in main_dim - // 4.) do force-based correction in secondary dimensions - // 5.) find new neighbors in main_dim (hardest part, probably cell based) - - int main_up; - int main_down; - - // as each process computes the correction of the upper layer of vertices - // the source is 'main_up' and the target 'main_down' - MPI_Cart_shift(this->globalComm, main_dim, 1, &main_down, &main_up); - - W local_work = this->work.at(0); - W remote_work; - - // reduce work from local layer - MPI_Allreduce(MPI_IN_PLACE, &local_work, 1, MPIDataTypeW, MPI_SUM, - main_communicator); - - // exchange local work with neighbor in main direction - MPI_Status state; - MPI_Sendrecv(&local_work, 1, MPIDataTypeW, main_down, 1010, &remote_work, 1, - MPIDataTypeW, main_up, 1010, this->globalComm, &state); - - // transfer width of domains as well - T remote_width; - T local_width; - switch (main_dim) { - case 0: - local_width = this->prevVertices.at(1)[0] - this->prevVertices.at(0)[0]; - break; - case 1: - local_width = this->prevVertices.at(2)[1] - this->prevVertices.at(0)[1]; - break; - case 2: - local_width = this->prevVertices.at(4)[2] - this->prevVertices.at(0)[2]; - break; - default: - throw InternalErrorException( - __FILE__, __func__, __LINE__, - "Invalid main dimension provided (numerical value not 0, 1 or 2)."); - break; - } - MPI_Sendrecv(&local_width, 1, MPIDataTypeT, main_down, 1010, &remote_width, 1, - MPIDataTypeT, main_up, 1010, this->globalComm, &state); - T total_width = local_width + remote_width; - T max_main_shift = std::min(0.49 * std::min(local_width, remote_width), - std::min(local_width, remote_width) - 1.0); - - // compute shift - T local_shift = (remote_work - local_work) / (remote_work + local_work) / - this->gamma / 2.0 * total_width; - local_shift = (std::abs(local_shift) > max_main_shift) - ? std::abs(local_shift) * max_main_shift / local_shift - : local_shift; - T remote_shift = 0; - - // TODO: fix -> send to upper neighbor, receive from lower neighbor!! - // sendrecv not correct! - - // transfer shift back - MPI_Sendrecv(&local_shift, 1, MPIDataTypeT, main_up, 2020, &remote_shift, 1, - MPIDataTypeT, main_down, 2020, this->globalComm, &state); - - // apply shift in main direction - switch (main_dim) { - case 0: - if (this->local_coords.at(0) > 0) { - this->vertices.at(0)[0] = this->prevVertices.at(0)[0] + remote_shift; - this->vertices.at(2)[0] = this->prevVertices.at(2)[0] + remote_shift; - this->vertices.at(4)[0] = this->prevVertices.at(4)[0] + remote_shift; - this->vertices.at(6)[0] = this->prevVertices.at(6)[0] + remote_shift; + for (int i = 0; i < nRanks; ++i) + { + globalTargets.at(i).first = globalData.at(i)[3]; + globalTargets.at(i).second = i; + globalTarget = globalTarget + Point<T>(3, globalData.at(i)); } - if (this->local_coords.at(0) < this->global_dims.at(0) - 1) { - this->vertices.at(1)[0] = this->prevVertices.at(1)[0] + local_shift; - this->vertices.at(3)[0] = this->prevVertices.at(3)[0] + local_shift; - this->vertices.at(5)[0] = this->prevVertices.at(5)[0] + local_shift; - this->vertices.at(7)[0] = this->prevVertices.at(7)[0] + local_shift; + globalTarget = 1.0 / (T) nRanks * globalTarget; + + std::sort(globalTargets.begin(), globalTargets.end(), cmp); + + int n = 0; + while (std::pow(2,n) <= 0.125 * nRanks) + { + for (int j = (int)std::pow(2,n)-1; j < (int)std::pow(2,n); ++j) + for (int i = 0; i < 3; ++i) + shift[i] += (0.25 / ( this->gamma * (T)std::pow(2,n) ) ) * (globalData.at(globalTargets.at(j).second)[i] - this->vertices.at(7)[i]) / 8.0; + ++n; } - break; - case 1: - if (this->local_coords.at(1) > 0) { - this->vertices.at(0)[1] = this->prevVertices.at(0)[1] + remote_shift; - this->vertices.at(1)[1] = this->prevVertices.at(1)[1] + remote_shift; - this->vertices.at(4)[1] = this->prevVertices.at(4)[1] + remote_shift; - this->vertices.at(5)[1] = this->prevVertices.at(5)[1] + remote_shift; + + for (int i = 0; i < 3; ++i) + shift[i] += (0.25 / (this->gamma * nRanks)) * (globalTarget[i] - this->vertices.at(7)[i]); + +*/ + + // compute the local influence on the shift + + // compute average work around vertex + T workAvg; + workAvg = 0.0; + for (int n = 0; n < 8; ++n) + workAvg += shiftData.at(4*n+3) / 8.0; + + // sort work from highest to lowest + std::vector<std::pair<T,int>> localTargets(8); + + for (int i = 0; i < 8; ++i) + { + localTargets.at(i).first = shiftData.at(4*i+3); + localTargets.at(i).second = i; } - if (this->local_coords.at(1) < this->global_dims.at(1) - 1) { - this->vertices.at(2)[1] = this->prevVertices.at(2)[1] + local_shift; - this->vertices.at(3)[1] = this->prevVertices.at(3)[1] + local_shift; - this->vertices.at(6)[1] = this->prevVertices.at(6)[1] + local_shift; - this->vertices.at(7)[1] = this->prevVertices.at(7)[1] + local_shift; + + std::sort(localTargets.begin(), localTargets.end(), cmp); + + std::vector<T> diffFactor(8); + + + int nInfl = 0; + for (int i = 0; i < 8; ++i) + if (localTargets.at(i).first - workAvg > 1e-12) + ++nInfl; + + for (int i = 0; i < 8; ++i) + { + + //if (shiftData.at(4*i+3) > workAvg) + // diffFactor.at(i) = (shiftData.at(4*i+3) - workAvg)/(localTargets.at(0).first - workAvg); + //else + // diffFactor.at(i) = 0.05; + + if (localTargets.at(i).first - workAvg > 1e-12) + diffFactor.at(i) = 1.0 / (double)nInfl; + else + diffFactor.at(i) = 0.0; } - break; - case 2: - if (this->local_coords.at(2) > 0) { - this->vertices.at(0)[2] = this->prevVertices.at(0)[2] + remote_shift; - this->vertices.at(1)[2] = this->prevVertices.at(1)[2] + remote_shift; - this->vertices.at(2)[2] = this->prevVertices.at(2)[2] + remote_shift; - this->vertices.at(3)[2] = this->prevVertices.at(3)[2] + remote_shift; + + + if (this->localRank == 0) + { + std::cout << "localTargets: "; + for (int i = 0; i < 8; ++i) + std::cout << "[ " << localTargets.at(i).first << " | " << localTargets.at(i).second << " | " << diffFactor.at(localTargets.at(i).second) << " ] "; + std::cout << std::endl; } - if (this->local_coords.at(2) < this->global_dims.at(2) - 1) { - this->vertices.at(4)[2] = this->prevVertices.at(4)[2] + local_shift; - this->vertices.at(5)[2] = this->prevVertices.at(5)[2] + local_shift; - this->vertices.at(6)[2] = this->prevVertices.at(6)[2] + local_shift; - this->vertices.at(7)[2] = this->prevVertices.at(7)[2] + local_shift; + + + for (int n = 0; n < 8; ++n) + { + if (std::abs(shiftData.at(4*n+3) + locInfo.at(3)) > 1e-12) + { + // compute direction vector from vertex to geometric + // center of neighboring domain + Point<T> domCenter(3, shiftData.data()+4*n); + + // normalize vector to insure that the domains do not become + // too deformed after shift + Point<T> direction(3); + direction = (domCenter - this->vertices.at(7)) * 0.5; + + T scalContrib = ( ( shiftData.at(4*n+3) - workAvg ) + * diffFactor.at(n) + / workAvg + / this->gamma ); + + //if (scalContrib < 1e-12) scalContrib = 0.0; + + // compute contribution to shift + Point<T> contrib = direction * scalContrib; + + + // add contribution to shift + shift = shift + contrib; + } } - break; - default: - throw InternalErrorException( - __FILE__, __func__, __LINE__, - "Invalid main dimension provided (numerical value not 0, 1 or 2)."); - break; - } - if (this->localRank <= 1) { - std::cout << "DEBUG (main-dim shift): " << this->localRank << " " - << remote_work << " " << local_work << " " << total_width << " " - << local_shift << " " << remote_shift - << " 0: " << this->prevVertices.at(0)[2] - << " 0: " << this->vertices.at(0)[2] - << " 4: " << this->prevVertices.at(4)[2] - << " 4: " << this->vertices.at(4)[2] << " " << std::endl; - } - int last_vertex = (n_vertices - 1) * n_vertices * (this->dimension + 2); - int vertex_offset = this->dimension + 2; - - // compute max shift - T max_shift = std::max( - 0.49 * vertex_info[last_vertex + 0 * vertex_offset + this->dimension + 1], - 1e-6); - for (int v = 1; v < n_vertices; ++v) { - max_shift = std::min( - max_shift, - 0.49 * - vertex_info[last_vertex + v * vertex_offset + this->dimension + 1]); - } - // average work of neighboring processes for last vertex - T avg_work = 0.0; - for (int v = 0; v < n_vertices; ++v) { - avg_work += vertex_info[last_vertex + v * vertex_offset + this->dimension]; - } - avg_work /= (T)n_vertices; - - // compute shift vector - std::vector<T> vertex_shift(n_vertices * this->dimension, (T)0.0); - for (auto &sv : vertex_shift) - sv = (T)0.0; - int shift_offset = (n_vertices - 1) * this->dimension; - - for (int v = 0; v < n_vertices; ++v) { - for (int d = 0; d < this->dimension; ++d) { - if (this->localRank == 0) - std::cout - << "DEBUG (shift x): " << v << ", " << d << " " - << (vertex_info[last_vertex + v * vertex_offset + this->dimension] - - avg_work) * - ((vertex_info[last_vertex + v * vertex_offset + - this->dimension] > avg_work) - ? 1.0 - : -1.0) / - this->gamma * - (vertex_info[last_vertex + v * vertex_offset + d] - - this->prevVertices.at(v)[d]) - << " => " << vertex_shift.at(shift_offset + d) << - - " max: " << max_shift - << " " - " avg_work: " - << avg_work - << " " - " work: " - << vertex_info[last_vertex + v * vertex_offset + this->dimension] - << " " - " 1/0: " - << ((vertex_info[last_vertex + v * vertex_offset + - this->dimension] > avg_work) - ? 1.0 - : -1.0) - - << std::endl; - vertex_shift.at(shift_offset + d) += - (vertex_info[last_vertex + v * vertex_offset + this->dimension] - - avg_work) * - ((vertex_info[last_vertex + v * vertex_offset + this->dimension] > - avg_work) - ? 1.0 - : -1.0) / - this->gamma * - (vertex_info[last_vertex + v * vertex_offset + d] - - this->prevVertices.at(v)[d]); + // prevent shifts outside of system box and normalize (with number of + // neighboring domains for the vertex) + for (int d = 0; d < 3; ++d) + { + if (this->local_coords.at(d) == this->global_dims.at(d) - 1) + shift[d] = 0.0; + else + shift[d] = shift[d]; } - } - Point<T> shift_vector(3); - shift_vector[0] = vertex_shift.at(shift_offset); - shift_vector[1] = vertex_shift.at(shift_offset + 1); - shift_vector[2] = vertex_shift.at(shift_offset + 2); - T shift_length = shift_vector.norm(); - if (this->localRank == 0) - std::cout << "DEBUG (shift length): " << shift_length - << " shift vector: " << shift_vector[0] << " " << shift_vector[1] - << " " << shift_vector[2] << " " << std::endl; - - // apply correct length, if the shift vector is too large - if (shift_length > max_shift) { - for (int d = 0; d < this->dimension; ++d) { - vertex_shift.at(shift_offset + d) *= max_shift / shift_length; - } - } + std::vector<T> commShift( + {shift[0], + shift[1], + shift[2]} + ); + + // send computed shift to neighbors + for (int n = 0; n < 8; ++n) + { + // receives + MPI_Irecv( shifts.data()+3*n, + 3, + MPIDataTypeT, + neighbors.at(infoNeig.at(n)), + 2012, + this->globalComm, + recvReq.data()+n ); + + // sends + MPI_Isend( commShift.data(), + 3, + MPIDataTypeT, + neighbors.at(shiftNeig.at(n)), + 2012, + this->globalComm, + sendReq.data()+n ); - if (this->localRank == 0) { - for (int v = 0; v < n_vertices; ++v) { - std::cout << "DEBUG shift vector vertex (before): " << v << ": "; - for (int d = 0; d < this->dimension; ++d) { - std::cout << vertex_shift.at(v * this->dimension + d) << " "; - } - std::cout << std::endl; } - } - if (this->localRank == 0 || this->localRank == 1) { - for (int v = 0; v < n_vertices; ++v) { - std::cout << "DEBUG shift vector vertex " << v << ": "; - for (int d = 0; d < this->dimension; ++d) { - std::cout << vertex_shift.at(v * this->dimension + d) << " "; - } - std::cout << std::endl; - } - } + MPI_Waitall(8,sendReq.data(),sendStat.data()); + MPI_Waitall(8,recvReq.data(),recvStat.data()); - bool shift[3]; - for (int z = 0; z < 2; ++z) { - if ((z == 0 && this->local_coords.at(2) == 0) || - (z == 1 && this->local_coords.at(2) == this->global_dims.at(2) - 1)) - shift[2] = false; - else - shift[2] = true; - for (int y = 0; y < 2; ++y) { - if ((y == 0 && this->local_coords.at(1) == 0) || - (y == 1 && this->local_coords.at(1) == this->global_dims.at(1) - 1)) - shift[1] = false; - else - shift[1] = true; - for (int x = 0; x < 2; ++x) { - if ((x == 0 && this->local_coords.at(0) == 0) || - (x == 1 && this->local_coords.at(0) == this->global_dims.at(0) - 1)) - shift[0] = false; - else - shift[0] = true; - for (int d = 0; d < this->dimension; ++d) { - if (d == main_dim) - continue; - int v = 4 * z + 2 * y + x; - if (shift[d]) - this->vertices.at(v)[d] = this->prevVertices.at(v)[d] + - vertex_shift.at(v * this->dimension + d); - else - this->vertices.at(v)[d] = this->prevVertices.at(v)[d]; - } - } + for (int n = 0; n < 8; ++n) + { + Point<T> shiftVec(3, shifts.data()+3*n); + this->vertices.at(n) = this->vertices.at(n) + shiftVec; } - } } }//namespace ALL