merge fixes

05122845 · Rene Halver · e3584bf9 · d781c2c4 · 05122845 · 05122845
Commit 05122845 authored Feb 26, 2019 by Rene Halver
--- a/example/ALL_test.cpp
+++ b/example/ALL_test.cpp
@@ -140,7 +140,9 @@ void read_points(std::vector<ALL_Point<double>>& points,
        MPI_Comm comm)
 {
    MPI_File file;
+    MPI_Barrier(comm);
    int err = MPI_File_open(comm, filename, MPI_MODE_RDONLY, MPI_INFO_NULL, &file);
+    MPI_Barrier(comm);

    n_points = 0;

@@ -404,6 +406,7 @@ int main(int argc, char** argv)
        // setup of vector of points on each MPI rank
        std::vector<double> dummy(sys_dim);
        std::vector<ALL_Point<double>> points;
+        int max_neighbors, loc_neighbors;

        // setup of cartesian communicator
        int local_rank;
@@ -525,6 +528,7 @@ int main(int argc, char** argv)
        ALL_Point<double> up(u);
        std::vector<ALL_Point<double>> vertices(nvertices,lp);
        std::vector<ALL_Point<double>> new_vertices(nvertices,lp);
+        std::vector<ALL_Point<double>> old_vertices(nvertices,lp);

        switch(chosen_method)
        {
@@ -595,9 +599,11 @@ int main(int argc, char** argv)

        MPI_Allreduce(&n_points,&max_particles,1,MPI_INT,MPI_MAX,cart_comm);

-        //if (local_rank == 0) 
-        //  std::cout << "maximum number of points on any process: " 
-        //            << max_particles << std::endl;
+        max_particles = (int)std::ceil((double)max_particles * 1.5);
+
+        if (local_rank == 0) 
+          std::cout << "maximum number of points on any process: " 
+                    << max_particles << std::endl;
        int max_neig = 27;

        recv = new double[max_neig * (sys_dim+1) * max_particles];
@@ -1004,13 +1010,15 @@ int main(int argc, char** argv)
            MPI_Barrier(cart_comm);
            if (d_ratio < limit_efficiency)
            {
-                gamma /= 2.0;
+                gamma *= 2.0;
                limit_efficiency /= 2.0;
            }
            if (local_rank == 0) std::cout << "loop " << i_loop << ": " << std::endl;
            std::vector<double> work;
+            std::vector<int> n_bins(3,-1);

-            double histogram_width = ALL_HISTOGRAM_DEFAULT_WIDTH / (double)(i_loop + 1);
+            //double histogram_width = ALL_HISTOGRAM_DEFAULT_WIDTH / (double)(i_loop + 1);
+            double histogram_width = ALL_HISTOGRAM_DEFAULT_WIDTH / gamma;

            if (!weighted_points)
            {
@@ -1037,58 +1045,46 @@ int main(int argc, char** argv)
                    work = std::vector<double>(1,(double)n_points);
                else
                {
-                    std::vector<int> n_bins(3,-1);
-                    std::vector<double> lb(3,-1.0);
-                    std::vector<double> ub(3,-1.0);
-                    std::vector<double> overlap(3,0.0);
-                    int offset = 0;
+                    double lb(-1.0);
+                    double ub(-1.0);
+                    double overlap(0.0);
+                    int d = 2 - i_loop % 3;
                    // compute number of bins in each direction
-                    for (int d = 0; d < 3; ++d)
-                    {
-                        lb.at(d) = std::ceil(lp.x(d) / histogram_width) * histogram_width;
-                        ub.at(d) = std::ceil(up.x(d) / histogram_width) * histogram_width;
-                        n_bins.at(d) = (ub.at(3) - lb.at(3)) / histogram_width;
-                        overlap.at(d) = 0.0;
-                    }
-                    work = std::vector<double>(n_bins[0]*
-                            n_bins[1]*
-                            n_bins[2], 0.0);
+                    lb = std::ceil(lp.x(d) / histogram_width) * histogram_width;
+                    ub = std::ceil(up.x(d) / histogram_width) * histogram_width;
+                    n_bins.at(d) = (ub - lb) / histogram_width;
+
+                    work = std::vector<double>(n_bins.at(d), 0.0);
                    // compute histogram of work load
-                    for (int d = 0; d < 3; ++d)
-                    {
                    for (auto p : points)
                    {
-                            int idx = offset + (int)( ( p.x(d) - lb.at(d) / histogram_width ) );
+                        int idx = (int)( ( ( p.x(d) - lb ) / histogram_width ) );
                        if (idx >= 0)
+                        {
                            work.at(idx) += 1.0;
-                            else
-                                overlap.at(d) += 1.0;
                        }
-                        offset += n_bins[d];
+                        else
+                            overlap += 1.0;
                    }

                    // exchange overlapping workload (histograms might overlap
                    // over the domain boundaries
-
-                    offset = 0;
-                    for (int d = 0; d < 3; ++d)
-                    {
                    int rank_left, rank_right; 
                    MPI_Cart_shift(cart_comm,0,1,&rank_left,&rank_right);

                    MPI_Request sreq, rreq; 
                    MPI_Status ssta, rsta;

-                        std::vector<double> recv_work(3);
+                    double recv_work;

-                        MPI_Isend(&overlap.at(d),
+                    MPI_Isend(&overlap,
                            1,
                            MPI_DOUBLE,
                            rank_left,
                            0,
                            cart_comm,
                            &sreq);
-                        MPI_Irecv(&recv_work.at(d),
+                    MPI_Irecv(&recv_work,
                            1,
                            MPI_DOUBLE,
                            rank_right,
@@ -1099,10 +1095,7 @@ int main(int argc, char** argv)
                    MPI_Wait(&rreq,&rsta);

                    if (local_coords[d] != global_dim[d] - 1)
-                            work.at(offset + n_bins[d] - 1) += recv_work.at(d);
-
-                        offset += n_bins[d];
-                    }
+                        work.at(n_bins.at(d) - 1) += recv_work;
                }
            }
            else
@@ -1116,60 +1109,46 @@ int main(int argc, char** argv)
                }
                else
                {
-                    std::vector<int> n_bins(3,-1);
-                    std::vector<double> lb(3,-1.0);
-                    std::vector<double> ub(3,-1.0);
-                    std::vector<double> overlap(3,0.0);
-                    int offset = 0;
+                    double lb(-1.0);
+                    double ub(-1.0);
+                    double overlap(0.0);
+                    int d = 2 - i_loop % 3;
                    // compute number of bins in each direction
-                    for (int d = 0; d < 3; ++d)
-                    {
-                        lb.at(d) = std::ceil(lp.x(d) / histogram_width) * histogram_width;
-                        ub.at(d) = std::ceil(up.x(d) / histogram_width) * histogram_width;
-                        n_bins.at(d) = (ub.at(d) - lb.at(d)) / histogram_width;
-                        overlap.at(d) = 0.0;
-                    }
-                    work = std::vector<double>(n_bins[0]*
-                            n_bins[1]*
-                            n_bins[2], 0.0);
+                    lb = std::ceil(lp.x(d) / histogram_width) * histogram_width;
+                    ub = std::ceil(up.x(d) / histogram_width) * histogram_width;
+                    n_bins.at(d) = (ub - lb) / histogram_width;
+
+                    work = std::vector<double>(n_bins.at(d), 0.0);
                    // compute histogram of work load
-                    for (int d = 0; d < 3; ++d)
-                    {
                    for (auto p : points)
                    {
-                            int idx = offset + (int)( ( p.x(d) - lb.at(d) / histogram_width ) );
+                        int idx = (int)( ( ( p.x(d) - lb ) / histogram_width ) );
                        if (idx >= 0)
                        {
                            work.at(idx) += p.get_weight();
                        }
                        else
-                                overlap.at(d) += p.get_weight();
-                        }
-                        offset += n_bins[d];
+                            overlap += p.get_weight();
                    }

                    // exchange overlapping workload (histograms might overlap
                    // over the domain boundaries
-
-                    offset = 0;
-                    for (int d = 0; d < 3; ++d)
-                    {
                    int rank_left, rank_right; 
                    MPI_Cart_shift(cart_comm,0,1,&rank_left,&rank_right);

                    MPI_Request sreq, rreq; 
                    MPI_Status ssta, rsta;

-                        std::vector<double> recv_work(3);
+                    double recv_work;

-                        MPI_Isend(&overlap.at(d),
+                    MPI_Isend(&overlap,
                            1,
                            MPI_DOUBLE,
                            rank_left,
                            0,
                            cart_comm,
                            &sreq);
-                        MPI_Irecv(&recv_work.at(d),
+                    MPI_Irecv(&recv_work,
                            1,
                            MPI_DOUBLE,
                            rank_right,
@@ -1180,9 +1159,7 @@ int main(int argc, char** argv)
                    MPI_Wait(&rreq,&rsta);

                    if (local_coords[d] != global_dim[d] - 1)
-                            work.at(offset + n_bins[d] - 1) += recv_work.at(d);
-
-                        offset += n_bins[d];
+                        work.at(n_bins.at(d) - 1) += recv_work;
                }
 #ifdef ALL_VORONOI
                if (chosen_method == ALL_LB_t::VORONOI)
@@ -1204,12 +1181,17 @@ int main(int argc, char** argv)
                }
 #endif          
            }
-            }
+
+            MPI_Barrier(cart_comm);
+            if (local_rank == 0)
+                std::cout << "finished computation of work" << std::endl;
            //lb_obj.set_work((double)n_points);

            lb_obj.set_work(work);
            lb_obj.set_communicator(cart_comm);
            lb_obj.setup(chosen_method);
+            if (chosen_method == ALL_LB_t::HISTOGRAM)
+                lb_obj.set_method_data(chosen_method,n_bins.data());


            lb_obj.set_sys_size(chosen_method, sys_size);
@@ -1223,7 +1205,18 @@ int main(int argc, char** argv)
            lb_obj.balance(chosen_method);

            new_vertices = lb_obj.get_result_vertices();
+            old_vertices = vertices;
            vertices = new_vertices;
+
+            std::cout << "vertices " << i_loop << " " << local_rank <<
+                " " << vertices.at(0).x(0) <<
+                " " << vertices.at(0).x(1) <<
+                " " << vertices.at(0).x(2) <<
+                "| " << vertices.at(1).x(0) <<
+                " " << vertices.at(1).x(1) <<
+                " " << vertices.at(1).x(2) <<
+            std::endl;
+
            if (chosen_method == ALL_LB_t::VORONOI)
            {
                vertices.resize(2);
@@ -1385,10 +1378,15 @@ int main(int argc, char** argv)
                        lb_obj.get_neighbors(chosen_method,neighbors);
                        lb_obj.get_neighbors(chosen_method,&n_neighbors);

-
                        offset_neig[0] = 0;
                        offset_neig[1] = n_neighbors[0];

+                        loc_neighbors = 0;
+                        for (int n = 0; n < 6; ++n)
+                            loc_neighbors += n_neighbors[n];
+
+                        MPI_Allreduce(&loc_neighbors,&max_neighbors,1,MPI_INT,MPI_MAX,cart_comm);
+

                        for (int i = 0; i < sys_dim; ++i)
                        {
@@ -1415,6 +1413,7 @@ int main(int argc, char** argv)
                            {
                                n_transfer[j] = 0;
                            }
+
                            for (auto p = points.begin(); p != points.end(); ++p)
                            {
                                if (p->x(i) < vertices.at(0).x(i))
@@ -1430,12 +1429,15 @@ int main(int argc, char** argv)
                                    n_transfer[0]++;
                                    if (n_transfer[0] > max_particles)
                                    {
+                                        std::stringstream ss;
+                                        ss << "Trying to send more particles than buffer size allows! "
+                                           << " n_transfer: " << n_transfer[0] 
+                                           << " max_particles: " << max_particles;
                                        throw ALL_Invalid_Argument_Exception(
                                                __FILE__,
                                                __func__,
                                                __LINE__,
-                                                "Trying to send more particles than buffer \
-                                                size allows!"
+                                                ss.str().c_str()
                                                );
                                    }
                                }
@@ -1458,12 +1460,15 @@ int main(int argc, char** argv)
                                    n_transfer[1]++;
                                    if (n_transfer[1] > max_particles)
                                    {
+                                        std::stringstream ss;
+                                        ss << "Trying to send more particles than buffer size allows! "
+                                           << " n_transfer: " << n_transfer[0] 
+                                           << " max_particles: " << max_particles;
                                        throw ALL_Invalid_Argument_Exception(
                                                __FILE__,
                                                __func__,
                                                __LINE__,
-                                                "Trying to send more particles than buffer \
-                                                size allows!"
+                                                ss.str().c_str()
                                                );
                                    }
                                }
@@ -1533,7 +1538,7 @@ int main(int argc, char** argv)

                            if (n_neighbors[2*i] > MAX_NEIG)
                            {
-                                throw ALL_Invalid_Argument_Exception(
+                                throw ALL_Internal_Error_Exception(
                                        __FILE__,
                                        __func__,
                                        __LINE__,
@@ -2187,6 +2192,16 @@ int main(int argc, char** argv)
 #endif
                    }
                    break;
+                case ALL_LB_t::HISTOGRAM:
+                {
+                    int curr_dim = 2 - (i_loop % 3);
+                    // dertermine current dimension
+                    // compare old domains with new domains
+
+                    // transfer points from old domains to new domains
+                    std::cout << "No Comm Yet" << std::endl;
+                    break;
+                }
                default:
                    break;
            }
@@ -2278,7 +2293,11 @@ int main(int argc, char** argv)
                    of.open("minmax.dat", std::ios::out | std::ios::app);
                else
                    of.open("minmax_w.dat", std::ios::out | std::ios::app);
-                of << i_loop+1 << " " << d_min << " " << d_max << " " << d_ratio << std::endl;
+                of << i_loop+1 << " " 
+                    << d_min << " " 
+                    << d_max << " " 
+                    << d_ratio << " " 
+                    << max_neighbors << std::endl;
                of.close();
                if (i_loop % OUTPUT_INTV == 0)
                {

--- a/include/ALL_Staggered.hpp
+++ b/include/ALL_Staggered.hpp
@@ -143,7 +143,12 @@ template <class T, class W> ALL_Staggered_LB<T,W>::~ALL_Staggered_LB()
    if (global_dims) delete global_dims;
    if (local_coords) delete local_coords;
    if (periodicity) delete periodicity;
-    if (communicators) delete communicators;
+    if (communicators)
+    {
+        for (int i = 1; i < 3; ++i)
+            MPI_Comm_free(communicators + i);
+        delete communicators;
+    }
    if (n_neighbors) delete n_neighbors;
 }