diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 4b368ae4041497fe92917f96b7e16d8c057285c3..0fb81608d55c8e2eab791bc36f3d8ebafb7a6e60 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,15 +1,13 @@
-image: centos-test
-
 stages:
     - init
     - build
 
 before_script:
-    - hostname
-    - whoami
-    - pwd
-    - ls
-    - ls -l ci
+#    - hostname
+#    - whoami
+#    - pwd
+#    - ls
+#    - ls -l ci
     - mkdir -p badges/
 
 badges:
@@ -44,7 +42,7 @@ build-mpi-nofortran:
 
 build-mpi-fortran:
     stage: build
-    tags:
+   tags:
         - linux
     when: always
     script:
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 68a9d62a2318075197faaf92c9a1a029bb22a741..8c82569f975f4673c34937e7397ed45607ca098f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,6 +3,7 @@ cmake_minimum_required (VERSION 3.1)
 option(CM_ALL_VTK_OUTPUT "VTK output routine" OFF)
 option(CM_ALL_VORONOI "Voronoi-based loadbalancing scheme (requires Voro++)" OFF)
 option(CM_ALL_FORTRAN "VTK output routine" OFF)
+option(CM_ALL_DEBUG "Enable debug information" OFF)
 
 # leading and trailing whitespace should be silently ignored
 # thanks to an old FindMPI script
@@ -29,6 +30,11 @@ if(CM_ALL_VTK_OUTPUT)
     add_compile_options("-DALL_VTK_OUTPUT")
 endif(CM_ALL_VTK_OUTPUT)
 
+if (CM_ALL_DEBUG)
+    message("Using ALL debug information")
+    add_compile_options("-DALL_DEBUG_ENABLED")
+endif(CM_ALL_DEBUG)
+
 if(CM_ALL_VORONOI)
     message(STATUS "compiling voro++ version in contrib/voro++")
     add_subdirectory(contrib/voro++)
diff --git a/example/ALL_test.cpp b/example/ALL_test.cpp
index 6c5923fe7da098a007d7df4f970e2d62469341e5..dfd9876cc54e17d5efd9a9adb3ee5cb0a7c934e4 100644
--- a/example/ALL_test.cpp
+++ b/example/ALL_test.cpp
@@ -62,6 +62,8 @@
 #define MAX_NEIG 1024
 #define ALL_HISTOGRAM_DEFAULT_WIDTH 1.0
 
+#undef ALL_VTK_OUTPUT
+
 #ifdef ALL_VORONOI
 #define ALL_VORONOI_PREP_STEPS 50
 #endif
@@ -125,9 +127,9 @@ void generate_points(std::vector<ALL_Point<double>>& points,
  *
  * format of MPI I/O files:
  *
- *   n_proc int         : offset in point data
- *   n_proc int         : number of points on domain
- *   5*n_point double   : point data
+ *   n_proc int                     : offset in point data
+ *   n_proc int                     : number of points on domain
+ *   long + 4*n_point double        : point data
  ****************************************************************/
 
 void read_points(std::vector<ALL_Point<double>>& points,
@@ -146,9 +148,9 @@ void read_points(std::vector<ALL_Point<double>>& points,
 
     n_points = 0;
 
-    int n_procs;
-    int offset;
-    MPI_Comm_size(comm,&n_procs);
+    int n_ranks;
+    long offset;
+    MPI_Comm_size(comm,&n_ranks);
 
     if (err)
     {
@@ -158,30 +160,40 @@ void read_points(std::vector<ALL_Point<double>>& points,
 
     // read offset from file
     MPI_File_read_at(file,
-            (MPI_Offset)(rank*sizeof(int)),
+            (MPI_Offset)(rank*sizeof(long)),
             &offset,
             1,
-            MPI_INT,
+            MPI_LONG,
             MPI_STATUS_IGNORE);
     // read number of points from file
     MPI_File_read_at(file,
-            (MPI_Offset)((n_procs + rank)*sizeof(int)),
+            (MPI_Offset)(n_ranks * sizeof(long) + rank * sizeof(int)),
             &n_points,
             1,
             MPI_INT,
             MPI_STATUS_IGNORE);
 
-    double values[5];
+    double values[4];
+    long ID;
+    long block_size = sizeof(long) + 4*sizeof(double);
     for (int i = 0; i < n_points; ++i)
     {
         MPI_File_read_at(file,
-                (MPI_Offset)(5*(offset + i)*sizeof(double) 
-                    + 2 * n_procs * sizeof(int)),
+                (MPI_Offset)((offset + i)*block_size 
+                    + n_ranks * ( sizeof(int) + sizeof(long))),
+                &ID,
+                1,
+                MPI_DOUBLE,
+                MPI_STATUS_IGNORE);
+        MPI_File_read_at(file,
+                (MPI_Offset)((offset + i)*block_size 
+                    + n_ranks *( sizeof(int)+sizeof(long)) 
+                    + sizeof(long)),
                 values,
-                5,
+                4,
                 MPI_DOUBLE,
                 MPI_STATUS_IGNORE);
-        ALL_Point<double> p(dimension,&values[1],values[4]);
+        ALL_Point<double> p(dimension,&values[0],values[3],ID);
         points.push_back(p);
     }
 
@@ -316,6 +328,8 @@ int main(int argc, char** argv)
         sys_size.at(4) = 0;
         sys_size.at(5) = BOX_SIZE;
 
+        std::vector<double> min_size(3,2.0);
+
         ALL_LB_t chosen_method = ALL_LB_t::STAGGERED;
         bool weighted_points = false;
         char* filename = NULL;
@@ -1005,7 +1019,7 @@ int main(int argc, char** argv)
 #endif        
         double limit_efficiency = 0.5;
         // create ALL object
-        ALL<double,double> lb_obj(sys_dim,vertices,gamma);
+        ALL<double,double> lb_obj(chosen_method,sys_dim,vertices,gamma);
         for (int i_loop = 0; i_loop < max_loop; ++i_loop)
         {
             MPI_Barrier(cart_comm);
@@ -1182,42 +1196,91 @@ int main(int argc, char** argv)
                 }
 #endif          
             }
-
+#ifdef ALL_DEBUG_ENABLED
             MPI_Barrier(cart_comm);
             if (local_rank == 0)
                 std::cout << "finished computation of work" << std::endl;
             //lb_obj.set_work((double)n_points);
 
+            if( local_rank == 0 ) std::cout << "Setting work..." << std::endl;
+#endif
             lb_obj.set_work(work);
+#ifdef ALL_DEBUG_ENABLED
+            MPI_Barrier(cart_comm);
+            if( local_rank == 0 ) std::cout << "Setting process grid information..." << std::endl;
+#endif
             lb_obj.set_proc_grid_params(local_coords,global_dim);
+#ifdef ALL_DEBUG_ENABLED
+            MPI_Barrier(cart_comm);
+            if( local_rank == 0 ) std::cout << "Setting communicator..." << std::endl;
+#endif
             lb_obj.set_communicator(MPI_COMM_WORLD);
-            lb_obj.setup(chosen_method);
+#ifdef ALL_DEBUG_ENABLED
+            MPI_Barrier(cart_comm);
+            if( local_rank == 0 ) std::cout << "Setting up method..." << std::endl;
+#endif
+            lb_obj.setup();
+#ifdef ALL_DEBUG_ENABLED
+            MPI_Barrier(cart_comm);
+            if( local_rank == 0 ) std::cout << "Setting method data..." << std::endl;
+#endif
             if (chosen_method == ALL_LB_t::HISTOGRAM)
-                lb_obj.set_method_data(chosen_method,n_bins.data());
+                lb_obj.set_method_data(n_bins.data());
 
-
-            lb_obj.set_sys_size(chosen_method, sys_size);
+#ifdef ALL_DEBUG_ENABLED
+            MPI_Barrier(cart_comm);
+            if( local_rank == 0 ) std::cout << "Setting system size..." << std::endl;
+#endif
+            lb_obj.set_sys_size(sys_size);
+#ifdef ALL_DEBUG_ENABLED
+            MPI_Barrier(cart_comm);
+            if( local_rank == 0 ) std::cout << "Setting domain vertices..." << std::endl;
+#endif
             lb_obj.set_vertices(vertices);
+#ifdef ALL_DEBUG_ENABLED
+            MPI_Barrier(cart_comm);
+            if( local_rank == 0 ) std::cout << "Setting domain minimum size..." << std::endl;
+#endif
+            lb_obj.set_min_domain_size(min_size);
             // print out number of particles to check communication!
             int n_points_global = 0;
             MPI_Reduce(&n_points,&n_points_global,1,MPI_INT,MPI_SUM,0,MPI_COMM_WORLD);
-            if (local_rank == 0) 
-                std::cout << "number of particles in step " 
-                    << i_loop << ": " << n_points_global << std::endl;
-            lb_obj.balance(chosen_method);
+            if( local_rank == 0 ) std::cout << "number of particles in step " 
+                << i_loop << ": " << n_points_global << std::endl;
+#ifdef ALL_DEBUG_ENABLED
+            MPI_Barrier(cart_comm);
+            if( local_rank == 0 ) std::cout << "Balancing domains..." << std::endl;
+#endif
+            lb_obj.balance();
 
+#ifdef ALL_DEBUG_ENABLED
+            MPI_Barrier(cart_comm);
+            if( local_rank == 0 ) std::cout << "Updating vertices..." << std::endl;
+#endif
             new_vertices = lb_obj.get_result_vertices();
             old_vertices = vertices;
             vertices = new_vertices;
 
             if (chosen_method == ALL_LB_t::VORONOI)
             {
+#ifdef ALL_DEBUG_ENABLED
+            MPI_Barrier(cart_comm);
+                if( local_rank == 0 ) std::cout << "Updating vertices (Voronoi)..." << std::endl;
+#endif
                 vertices.resize(2);
                 vertices.at(1) = vertices.at(0);
             }
+#ifdef ALL_DEBUG_ENABLED
+            MPI_Barrier(cart_comm);
+            if( local_rank == 0 ) std::cout << "Updating upper / lower vertices..." << std::endl;
+#endif
             lp = vertices.at(0);
             up = vertices.at(vertices.size()-1);
 
+#ifdef ALL_DEBUG_ENABLED
+            MPI_Barrier(cart_comm);
+            if( local_rank == 0 ) std::cout << "beginning particles transfer..." << std::endl;
+#endif
             switch(chosen_method)
             {
                 case ALL_LB_t::TENSOR:
@@ -1368,8 +1431,8 @@ int main(int argc, char** argv)
                         int offset_neig[2];
                         std::vector<int> neighbors;
                         int* n_neighbors;
-                        lb_obj.get_neighbors(chosen_method,neighbors);
-                        lb_obj.get_neighbors(chosen_method,&n_neighbors);
+                        lb_obj.get_neighbors(neighbors);
+                        lb_obj.get_neighbors(&n_neighbors);
 
                         offset_neig[0] = 0;
                         offset_neig[1] = n_neighbors[0];
@@ -1982,11 +2045,11 @@ int main(int argc, char** argv)
                     {
 #ifdef ALL_VORONOI                    
                         // get neighbor information
-                        int n_neighbors = lb_obj.get_n_neighbors(chosen_method);
+                        int n_neighbors = lb_obj.get_n_neighbors();
                         std::vector<double> neighbor_vertices;
                         std::vector<int> neighbors;
-                        lb_obj.get_neighbor_vertices(chosen_method, neighbor_vertices);
-                        lb_obj.get_neighbors(chosen_method, neighbors);
+                        lb_obj.get_neighbor_vertices(neighbor_vertices);
+                        lb_obj.get_neighbors(neighbors);
 
                         // compute voronoi cells
 
@@ -2277,7 +2340,7 @@ int main(int argc, char** argv)
                             */
                             if (
                                     (borders.at(2*n) <= old_border.at(0) &&
-                                     borders.at(2*n+1) > old_border.at(0)) ||
+                                     borders.at(2*n+1) >= old_border.at(0)) ||
                                     (borders.at(2*n) <= old_border.at(1) &&
                                      borders.at(2*n+1) >= old_border.at(1)) ||
                                     (borders.at(2*n) > old_border.at(0) &&
@@ -2286,7 +2349,7 @@ int main(int argc, char** argv)
                                 send_neig.push_back(n);
                             if (
                                     (old_borders.at(2*n) <= local_borders.at(0) &&
-                                     old_borders.at(2*n+1) > local_borders.at(0)) ||
+                                     old_borders.at(2*n+1) >= local_borders.at(0)) ||
                                     (old_borders.at(2*n) <= local_borders.at(1) &&
                                      old_borders.at(2*n+1) >= local_borders.at(1)) ||
                                     (old_borders.at(2*n) > local_borders.at(0) &&
@@ -2321,6 +2384,7 @@ int main(int argc, char** argv)
                                     send_vec.at(n).push_back(p.x(1));
                                     send_vec.at(n).push_back(p.x(2));
                                     send_vec.at(n).push_back(p.get_weight());
+                                    send_vec.at(n).push_back((double)(p.get_id()));
                                 }
                             }
                         }
@@ -2388,6 +2452,7 @@ int main(int argc, char** argv)
                             recvs++;
                         }
 
+
                         // transfer points from old domains to new domains
                         for (int n = 0; n < send_neig.size(); ++n)
                         {
@@ -2416,11 +2481,12 @@ int main(int argc, char** argv)
                         {
                             int idx;
                             MPI_Waitany(recv_neig.size(),rreq.data(),&idx,rsta.data());
-                            for (int p = 0; p < recv_vec.at(idx).size(); p+=4)
+                            for (int p = 0; p < recv_vec.at(idx).size(); p+=5)
                             {
                                 ALL_Point<double> tmp(3,
                                         recv_vec.at(idx).data()+p,
-                                        recv_vec.at(idx).at(p+3)
+                                        recv_vec.at(idx).at(p+3),
+                                        (long)(recv_vec.at(idx).at(p+4))
                                         );
                                 points.push_back(tmp);
                             }
@@ -2435,6 +2501,7 @@ int main(int argc, char** argv)
                     break;
             }
 
+
             if (i_loop <= 100 || i_loop % OUTPUT_INTV == 0)
             {
 #ifdef ALL_VORONOI
@@ -2467,6 +2534,7 @@ int main(int argc, char** argv)
 #endif
             }
 
+
             // calculate quality parameters
             if (!weighted_points)
             {
@@ -2483,8 +2551,6 @@ int main(int argc, char** argv)
             double n_total_points;
             MPI_Allreduce(&n_local,&n_total_points,1,MPI_DOUBLE,MPI_SUM,cart_comm);
 
-
-
             MPI_Allreduce(&n_local,&n_total,1,MPI_DOUBLE,MPI_SUM,cart_comm);
             avg_work = n_total/(double)n_ranks;
             MPI_Allreduce(&n_local,&n_min,1,MPI_DOUBLE,MPI_MIN,cart_comm);
@@ -2549,14 +2615,27 @@ int main(int argc, char** argv)
                     else
                         of.open("domain_data_w.dat", std::ios::out | std::ios::app);
                     of << (i_loop+1) << " " << local_rank << " ";
-
+                   
+                    if (chosen_method == ALL_LB_t::HISTOGRAM)
+                    {
+                        int* n_neighbors;
+                        lb_obj.get_neighbors(&n_neighbors);
+                        int n_neig = 0;
+                        for (int j = 0; j < 3; ++j)
+                        {
+                            n_neig += n_neighbors[j];
+                        }
+                        of << n_neig << " ";
+                    }
+                    /*
                     for (int j = 0; j < sys_dim; ++j)
                     {
                         of << " " << local_coords[j] 
                             << " " << lp.x(j) << " " << up.x(j) << " ";
                     }
+                    */
 
-                    of << " " << n_local << " ";
+                    of << n_local << " ";
 
                     of << std::endl;
                     if (i == n_ranks - 1) of << std::endl;
@@ -2642,6 +2721,78 @@ int main(int argc, char** argv)
 
         }
 
+        // create binary output (e.g. for HimeLB)
+        // format:
+        // n_ranks integers (number of particles per domain)
+        // n_ranks integers (offset in file)
+        // <n_particles[0]> * (long + 3*double) double values (particle positions)
+
+
+        // open file
+        int err;        
+        MPI_File outfile;
+        err = MPI_File_open(MPI_COMM_WORLD, 
+                            "particles_output.bin", 
+                            MPI_MODE_CREATE | MPI_MODE_RDWR, 
+                            MPI_INFO_NULL, 
+                            &outfile);
+
+        // write out the number of particles on each process
+        MPI_File_write_at(outfile,
+                              (MPI_Offset)(local_rank*sizeof(int)),
+                              &n_points,
+                              1,
+                              MPI_INT,
+                              MPI_STATUS_IGNORE);
+
+        // compute the offset for each process
+        long n_p = (long)n_points;
+        long offset;
+
+        MPI_Scan(&n_p,&offset,1,MPI_LONG,MPI_SUM,cart_comm);
+        offset -= n_p;
+        // size of data for a single block
+        long block_size = sizeof(long); // + 3 * sizeof(double);
+
+        offset *= block_size;
+        offset += n_ranks * (sizeof(int) + sizeof(long));
+
+
+        MPI_File_write_at(outfile,
+                              (MPI_Offset)(     n_ranks * sizeof(int) 
+                                           + local_rank * sizeof(long)),
+                              &offset,
+                              1,
+                              MPI_LONG,
+                              MPI_STATUS_IGNORE);
+
+
+        for ( int n = 0; n < n_points; ++n)
+        {
+            long id = points.at(n).get_id();
+            double loc[3];
+            loc[0] = points.at(n).get_coordinate(0);
+            loc[1] = points.at(n).get_coordinate(1);
+            loc[2] = points.at(n).get_coordinate(2);
+            MPI_File_write_at(outfile,
+                              (MPI_Offset)((offset+n*block_size)),
+                              &id,
+                              1,
+                              MPI_LONG,
+                              MPI_STATUS_IGNORE);
+            /*
+            MPI_File_write_at(outfile,
+                              (MPI_Offset)((offset+n*block_size)+sizeof(long)),
+                              loc,
+                              3,
+                              MPI_DOUBLE,
+                              MPI_STATUS_IGNORE);
+            */
+        }
+        
+        MPI_File_close(&outfile);
+
+
         /*
         // output of borders / contents
         if (n_ranks < 216)
@@ -2686,6 +2837,7 @@ int main(int argc, char** argv)
             std::cout << "min_step: " << min_step << std::endl;
             std::cout << std::endl;
         }
+        MPI_Barrier(cart_comm);
         MPI_Finalize();
         return EXIT_SUCCESS;
     }
diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt
index a6acb87ce9d7f70cf060a1fc65e6967daeea70e8..2b52833468b60538c9348e61b952367b85236117 100644
--- a/example/CMakeLists.txt
+++ b/example/CMakeLists.txt
@@ -26,6 +26,15 @@ install(TARGETS ASCII2MPIBIN
         LIBRARY DESTINATION lib
         INCLUDES DESTINATION include)
 
+add_executable(read_binary_output read_output.cpp)
+
+target_link_libraries(read_binary_output LINK_PUBLIC ALL)
+
+install(TARGETS read_binary_output
+        RUNTIME DESTINATION bin
+        LIBRARY DESTINATION lib
+        INCLUDES DESTINATION include)
+
 if(CM_ALL_FORTRAN)
     add_executable(ALL_test_f ALL_test_f.f90)
     if(CM_ALL_VTK_OUTPUT)
diff --git a/example/ascii2mpibin.cpp b/example/ascii2mpibin.cpp
index c41ed452da8e52467fbe3284464aa894ee13d324..bd65465f45d97b71d1aa2eef2e9ff1fadb5af9ef 100644
--- a/example/ascii2mpibin.cpp
+++ b/example/ascii2mpibin.cpp
@@ -35,7 +35,8 @@ int main (int argc, char** argv)
 
     char line[256];
 
-    double values[5];
+    long blockID;
+    double values[4];
     int counter = 0;
 
     double max[3];
@@ -47,9 +48,9 @@ int main (int argc, char** argv)
         std::string str(line);
         std::istringstream istr(str);
         //std::cout << str << std::endl;
-        istr >> values[0] >> values[1] >> values[2] >> values[3] >> values[4];
+        istr >> blockID >> values[0] >> values[1] >> values[2] >> values[3];
         for (int i = 0; i < 3; ++i)
-            if (max[i] < values[i+1]) max[i] = values[i+1];
+            if (max[i] < values[i]) max[i] = values[i];
     }        
 
     // go back to start of file
@@ -72,8 +73,8 @@ int main (int argc, char** argv)
     int offset = 0;
     const int n_procs = dim[0] * dim[1] * dim[2];
 
-    std::vector<int> loc_parts(dim[0]*dim[1]*dim[2]);
-    std::vector<int> offset_domain(dim[0]*dim[1]*dim[2]);
+    std::vector<long> loc_parts(dim[0]*dim[1]*dim[2]);
+    std::vector<long> offset_domain(dim[0]*dim[1]*dim[2]);
     std::vector<int> curr_index(dim[0]*dim[1]*dim[2]);
 
     for (int i = 0; i < dim[0] * dim[1] * dim[2]; ++i)
@@ -89,12 +90,12 @@ int main (int argc, char** argv)
         if(infile.eof()) break;
         std::string str(line);
         std::istringstream istr(str);
-        istr >> values[0] >> values[1] >> values[2] >> values[3] >> values[4];
+        istr >> blockID >> values[0] >> values[1] >> values[2] >> values[3];
 
         int cell[3];
 
         for (int i = 0; i < 3; ++i)
-            cell[i] = (int)(values[i+1] / cell_size[i]);
+            cell[i] = (int)(values[i] / cell_size[i]);
 
         loc_parts.at(cell[0]*dim[1]*dim[2]+cell[1]*dim[2]+cell[2])++;
     }
@@ -105,8 +106,9 @@ int main (int argc, char** argv)
     for (int i = 0; i < dim[0] * dim[1] * dim[2]; ++i)
     {
         offset_domain.at(i) -= loc_parts.at(i);
-        MPI_File_write_at_all(outfile,(MPI_Offset)(i*sizeof(int)),&offset_domain.at(i),1,MPI_INT,MPI_STATUS_IGNORE);
-        MPI_File_write_at_all(outfile,(MPI_Offset)((n_procs + i)*sizeof(int)),&loc_parts.at(i),1,MPI_INT,MPI_STATUS_IGNORE);
+        int n_loc = (int)loc_parts.at(i);
+        MPI_File_write_at_all(outfile,(MPI_Offset)(i*sizeof(long)),&offset_domain.at(i),1,MPI_LONG,MPI_STATUS_IGNORE);
+        MPI_File_write_at_all(outfile,(MPI_Offset)(n_procs *sizeof(long) + i * sizeof(int)),&n_loc,1,MPI_INT,MPI_STATUS_IGNORE);
     }
 
     // go back to start of file
@@ -119,21 +121,33 @@ int main (int argc, char** argv)
         if(infile.eof()) break;
         std::string str(line);
         std::istringstream istr(str);
-        istr >> values[0] >> values[1] >> values[2] >> values[3] >> values[4];
+        istr >> blockID >> values[0] >> values[1] >> values[2] >> values[3];
 
         int cell[3];
 
         for (int i = 0; i < 3; ++i)
-            cell[i] = (int)(values[i+1] / cell_size[i]);
+            cell[i] = (int)(values[i] / cell_size[i]);
 
         int domain = cell[0] * dim[1] * dim[2] + cell[1] * dim[2] + cell[2];
 
+        long block_size = sizeof(long) + 4*sizeof(double);
+
+        MPI_File_write_at_all(
+                outfile,
+                (MPI_Offset)
+                   (n_procs*(sizeof(int) + sizeof(long))+(offset_domain.at(domain)+curr_index.at(domain))*block_size),
+                &blockID,
+                1,
+                MPI_LONG,
+                MPI_STATUS_IGNORE
+        );
+
         MPI_File_write_at_all(
                 outfile,
                 (MPI_Offset)
-                   ((2*n_procs)*sizeof(int)+5*(offset_domain.at(domain)+curr_index.at(domain))*sizeof(double)),
+                   (n_procs*(sizeof(int) + sizeof(long))+(offset_domain.at(domain)+curr_index.at(domain))*block_size+sizeof(long)),
                 values,
-                5,
+                4,
                 MPI_DOUBLE,
                 MPI_STATUS_IGNORE
         );
diff --git a/example/read_output.cpp b/example/read_output.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6c6e75a985338fa909be8eba0d5fca1f91f3a821
--- /dev/null
+++ b/example/read_output.cpp
@@ -0,0 +1,101 @@
+#include <stdlib.h>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <numeric>
+#include <mpi.h>
+
+int main (int argc, char** argv)
+{
+    MPI_Init(&argc, &argv);
+
+    if (argc < 3)
+    {
+        std::cout << "usage: " << argv[0] << " <in_file (ASCII)> <n_p>" << std::endl;
+        MPI_Finalize();
+        exit(-1);
+    }
+
+    int n = atoi(argv[2]);
+
+    MPI_File infile;
+    int err;
+
+    err = MPI_File_open(
+                            MPI_COMM_WORLD,
+                            argv[1],
+                            MPI_MODE_CREATE | MPI_MODE_RDWR,
+                            MPI_INFO_NULL,
+                            &infile
+                       );
+
+    double positions[3];
+    long blockID;
+    long offset;
+    int n_part;
+
+    int blocksize = sizeof(long); // + 3 * sizeof(double);
+
+    for (int d = 0; d < n; ++d)
+    {
+        MPI_File_read_at(
+                            infile,
+                            (MPI_Offset)(d * sizeof(int)),
+                            &n_part,
+                            1,
+                            MPI_INT,
+                            MPI_STATUS_IGNORE
+                        );
+        MPI_File_read_at(
+                            infile,
+                            (MPI_Offset)(n * sizeof(int) + d * sizeof(long)),
+                            &offset,
+                            1,
+                            MPI_LONG,
+                            MPI_STATUS_IGNORE
+                        );
+        for (int p = 0; p < n_part; ++p)
+        {
+            MPI_File_read_at(
+                                infile,
+                                (MPI_Offset)(
+                                             offset + 
+                                             p * blocksize 
+                                            ),
+                                &blockID,
+                                1,
+                                MPI_LONG,
+                                MPI_STATUS_IGNORE
+                            );
+            /*
+            MPI_File_read_at(
+                                infile,
+                                (MPI_Offset)(
+                                             offset + 
+                                             p * blocksize + 
+                                             sizeof(long)
+                                            ),
+                                positions,
+                                3,
+                                MPI_DOUBLE,
+                                MPI_STATUS_IGNORE
+                            );
+            */
+            std::cout << "Rank: " << d
+                      << " Offset: " << offset
+                      << " N: " << n_part
+                      << " Particle: " << p
+                      << " ID: " << blockID
+                      /*
+                      << " Position: " << positions[0] << " "
+                                       << positions[1] << " "
+                                       << positions[2] << " "
+                      */
+                      << std::endl;
+        }
+    }
+
+    MPI_Finalize();
+}
diff --git a/include/ALL.hpp b/include/ALL.hpp
index 31f9a3fcec514bc1e930abc2374c8661e34b9656..a36bacc0f89865037f63ea03ba88e2202d890e5e 100644
--- a/include/ALL.hpp
+++ b/include/ALL.hpp
@@ -31,6 +31,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifndef ALL_MAIN_HEADER_INC
 #define ALL_MAIN_HEADER_INC
 
+#include "ALL_LB.hpp"
 #include "ALL_Point.hpp"
 #include "ALL_CustomExceptions.hpp"
 #include "ALL_Tensor.hpp"
@@ -46,6 +47,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #include <algorithm>
 #include <numeric>
 #include <iomanip>
+#include <memory>
 
 #ifdef ALL_VTK_OUTPUT
 #include <vtkXMLPUnstructuredGridWriter.h>
@@ -84,13 +86,11 @@ template <class T, class W> class ALL
     public:
 
         // constructor (empty)
-        ALL() : work_array(NULL), 
+        ALL() : 
 #ifdef ALL_VTK_OUTPUT
                 vtk_init(false),
 #endif        
-                balancer(NULL),
-                loadbalancing_step(0),
-                dimension(3) 
+                loadbalancing_step(0)
                 {
                     // determine correct MPI data type for template T
                     if (std::is_same<T,double>::value) mpi_data_type_T = MPI_DOUBLE;
@@ -106,23 +106,49 @@ template <class T, class W> class ALL
                 }
 
         // constructor (dimension only)
-        ALL(const int d_, const T g) : ALL()       { 
-                                                        gamma = g;
-                                                        dimension = d_;
-                                                        outline = new std::vector<T>(2*d_); 
-                                                        estimation_limit = ALL_ESTIMATION_LIMIT_DEFAULT;
-                                             }
+        ALL(ALL_LB_t m, const int d, const T g) : ALL()       
+        {
+            method = m;
+            switch(method)
+            {
+                case ALL_LB_t::TENSOR:
+                    balancer.reset(new ALL_Tensor_LB<T,W>(d,(W)0,g));
+                    break;
+                case ALL_LB_t::STAGGERED:
+                    balancer.reset(new ALL_Staggered_LB<T,W>(d,(W)0,g));
+                    break;
+                case ALL_LB_t::UNSTRUCTURED:
+                    balancer.reset(new ALL_Unstructured_LB<T,W>(d,(W)0,g));
+                    break;
+                case ALL_LB_t::VORONOI:
+#ifdef ALL_VORONOI
+                    balancer.reset(new ALL_Voronoi_LB<T,W>(d,(W)0,g));
+#endif                
+                    break;
+                case ALL_LB_t::HISTOGRAM:
+                    balancer.reset(new ALL_Histogram_LB<T,W>(d,std::vector<W>(10),g));
+                    break;
+                default:
+                    throw ALL_Invalid_Argument_Exception(
+                                            __FILE__,
+                                            __func__,
+                                            __LINE__,
+                                            "Unknown type of loadbalancing passed.");
+            }
+            balancer->set_dimension(d);        
+            balancer->set_gamma(g);
+            estimation_limit = ALL_ESTIMATION_LIMIT_DEFAULT;
+        }
 
         // constructor (dimension + vertices)
-        ALL(const int d_, const std::vector<ALL_Point<T>>& inp, const T g) : ALL(d_,g)
+        ALL(ALL_LB_t method, const int d_, std::vector<ALL_Point<T>>& inp, const T g) : ALL(method, d_,g)
         {
-            vertices = inp;
-            estimation_limit = ALL_ESTIMATION_LIMIT_DEFAULT;
+            balancer->set_vertices(inp);
             calculate_outline();
         }
 
         // destructor
-        ~ALL();
+        ~ALL() = default;
 
         // overwrite vertices
         // with a vector
@@ -144,58 +170,52 @@ template <class T, class W> class ALL
         // vector of work (cell based -> work per cell)
         void set_work(const std::vector<W>&);
 
-        // separate method used to create the balancer object (no setup method included)
-        void create_balancer(ALL_LB_t);
-
         // setup balancer
-        void setup(ALL_LB_t);
+        void setup();
 
         // calculate load-balance
-        void balance(ALL_LB_t method, bool internal = false);
+        std::vector<ALL_Point<T>>& balance(bool internal = false);
 
         // separate call ONLY for the setup method of the balancer
-        void setup_balancer(ALL_LB_t method);
-
-        // cleanup balancer
-        void cleanup(ALL_LB_t);
+        void setup_balancer();
 
         // getter functions
 
         // vertices 
-        std::vector<ALL_Point<T>>& get_vertices() {return vertices;};
-        std::vector<ALL_Point<T>>& get_result_vertices() {return result_vertices;};
+        std::vector<ALL_Point<T>>& get_vertices() {return balancer->get_vertices();};
+        std::vector<ALL_Point<T>>& get_result_vertices() {return balancer->get_shifted_vertices();};
 
         // dimension
-        int get_dimension() {return dimension;}
+        int get_dimension() {return balancer->get_dimension();}
 
         // work
         void get_work(std::vector<W>&);
         void get_work(W&);
 
         // neighbors
-        int get_n_neighbors(ALL_LB_t);
+        int get_n_neighbors();
         // provide list of neighbors (ranks)
-        void get_neighbors(ALL_LB_t,std::vector<int>&);
+        void get_neighbors(std::vector<int>&);
         // provide a list of neighbor vertices (VORONOI only)
-        void get_neighbor_vertices(ALL_LB_t,std::vector<T>&);
+        void get_neighbor_vertices(std::vector<T>&);
         // provide list of neighbors in each direction
-        void get_neighbors(ALL_LB_t,int**);
+        void get_neighbors(int**);
 
         // set system size
         // (x_min, x_max, y_min, y_max, z_min, z_max)
-        void set_sys_size(ALL_LB_t,std::vector<T>&);
+        void set_sys_size(std::vector<T>&);
 
         // method to set method specific data, without
         // the necessity to create dummy hooks for other
         // methods
-        void set_method_data(ALL_LB_t,void*);
+        void set_method_data(void*);
 
         // if using a non-cartesian communicator set the processor grid
         // parameters to create an internal cartesian grid
         void set_proc_grid_params(const std::vector<int>&, const std::vector<int>&);
         void set_proc_grid_params(int*, int*);
 
-        void set_min_domain_size(ALL_LB_t,T*);
+        void set_min_domain_size(const std::vector<T>&);
 
 #ifdef ALL_VTK_OUTPUT        
         // print outlines and points within domain
@@ -205,43 +225,34 @@ template <class T, class W> class ALL
 #endif        
 
     private:
-        ALL_LB_t load_balancer_type;
+        ALL_LB_t method;
 
         // outer cuboid encasing the domain (for cuboid domains identical to domain)
         // defined by front left lower [0][*] and back upper right points [1][*]
         // where * is 0,...,dim-1
-        std::vector<T>* outline;
-
-        // gamma = correction factor
-        T gamma;
+        std::vector<ALL_Point<T>> outline;
 
-        // dimension of the system (>=1)
-        int dimension;
+//         // gamma = correction factor
+//         T gamma;
+// 
+//         // dimension of the system (>=1)
+//         int dimension;
 
         // balancer
-        void *balancer;
+        std::unique_ptr<ALL_LB<T,W>> balancer;
 
         // data containing cells (TODO: defintion of data layout, etc.)
-        std::vector<W> *work_array;
+        std::vector<W> work_array;
 
+        // MPI communicator used in library
+        MPI_Comm communicator;
+        
         // number of vertices (for non-cuboid domains)
         int n_vertices;
 
         // number of neighbors
         int n_neighbors;
 
-        // list of vertices
-        std::vector<ALL_Point<T>> vertices;
-
-        // list of vertices (after a balancing shift)
-        std::vector<ALL_Point<T>> result_vertices;
-
-        // list of neighbors
-        std::vector<int> neighbors;
-
-        // global MPI communicator
-        MPI_Comm communicator;
-
         // calculate the outline of the domain using the vertices
         void calculate_outline();
 
@@ -268,117 +279,46 @@ template <class T, class W> class ALL
 #endif        
 };
 
-template <class T, class W> ALL<T,W>::~ALL()
-{
-    delete outline;
-    delete work_array;
-    proc_grid_set = false;
-
-  try
-  {
-    switch(load_balancer_type)
-    {
-    case ALL_LB_t::TENSOR:
-      delete (ALL_Tensor_LB<T,W>*)balancer;
-      break;
-    case ALL_LB_t::STAGGERED:
-      delete (ALL_Staggered_LB<T,W>*)balancer;
-      break;
-    case ALL_LB_t::UNSTRUCTURED:
-      delete (ALL_Unstructured_LB<T,W>*)balancer;
-      break;
-    case ALL_LB_t::VORONOI:
-#ifdef ALL_VORONOI
-      delete (ALL_Voronoi_LB<T,W>*)balancer;
-#endif
-      break;
-    case ALL_LB_t::HISTOGRAM:
-      delete (ALL_Histogram_LB<T,W>*)balancer;
-      break;
-    default:
-      throw ALL_Invalid_Argument_Exception(
-          __FILE__,
-          __func__,
-          __LINE__,
-          "Unknown type of loadbalancing passed.");
-    }
-  }
-  catch (ALL_Custom_Exception e)
-  {
-    std::cout << e.what() << std::endl;
-    MPI_Abort(MPI_COMM_WORLD,-1);
-  }
-}
-
 template <class T, class W> void ALL<T,W>::set_vertices(std::vector<ALL_Point<T>>& inp)
 {
     int dim = inp.at(0).get_dimension();
-    if (dim != dimension) throw ALL_Point_Dimension_Missmatch_Exception(
+    if (dim != balancer->get_dimension()) throw ALL_Point_Dimension_Missmatch_Exception(
                                 __FILE__,
                                 __func__,
                                 __LINE__,
                                 "Dimension of ALL_Points in input vector do not match dimension of ALL object.");
-    // clean old vertices (overwrite)
-    vertices = inp;
+    balancer->set_vertices(inp);
     calculate_outline();
 }
 
 template <class T, class W> void ALL<T,W>::set_vertices(const int n, const int dim, const T* inp)
 {
-    if (dim != dimension) throw ALL_Point_Dimension_Missmatch_Exception(
+    std::vector<ALL_Point<T>> points(n,ALL_Point<T>(dim));
+    if (dim != balancer->get_dimension()) throw ALL_Point_Dimension_Missmatch_Exception(
                                 __FILE__,
                                 __func__,
                                 __LINE__,
                                 "Dimension of ALL_Points in input vector do not match dimension of ALL object.");
-    ALL_Point<double> tmp(dim);
-    vertices = std::vector<ALL_Point<double>>(n,tmp);
     for (int i = 0; i < n; ++i)
     {
         for (int d = 0; d < dim; ++d)
         {
-            vertices.at(i).set_coordinate(d,inp[ i * dim + d ]);
+            points.at(i).set_coordinate(d,inp[ i * dim + d ]);
         }
     }
+    balancer->set_vertices(points);
     calculate_outline();
 }
 
-template <class T, class W> void ALL<T,W>::calculate_outline()
-{
-    // calculation only possible if there are vertices defining the domain
-    if (vertices.size() > 0)
-    {
-        // setup the outline with the first point
-        for (int i = 0; i < dimension; ++i)
-        {
-            outline->at(0*dimension+i) = outline->at(1*dimension+i) = vertices.at(0).x(i);
-        }
-        // compare value of each outline point with all vertices to find the maximum
-        // extend of the domain in each dimension
-        for (int i = 1; i < vertices.size(); ++i)
-        {
-            ALL_Point<T> p = vertices.at(i);
-            for( int j = 0; j < dimension; ++j)
-            {
-                outline->at(0*dimension+j) = std::min(outline->at(0*dimension+j),p.x(j));
-                outline->at(1*dimension+j) = std::max(outline->at(1*dimension+j),p.x(j));
-            }
-        }
-    }
-}
-
 template <class T, class W> void ALL<T,W>::set_work(W work)
 {
-    // clear work_array
-    if (work_array) delete work_array;
     // set new value for work (single value for whole domain)
-    work_array = new std::vector<W>(1);
-    work_array->at(0) = work;
+    balancer->set_work(work);
 }
 
 template <class T, class W> void ALL<T,W>::set_work(const std::vector<W>& work)
 {
-    if (work_array) delete work_array;
-    work_array = new std::vector<W>(work);
+    balancer->set_work(work);
 }
 
 template <class T, class W> void ALL<T,W>::set_proc_grid_params(const std::vector<int>& loc, const std::vector<int>& size)
@@ -390,9 +330,9 @@ template <class T, class W> void ALL<T,W>::set_proc_grid_params(const std::vecto
 
 template <class T, class W> void ALL<T,W>::set_proc_grid_params(int* loc, int* size)
 {
-    proc_grid_loc.resize(dimension);
-    proc_grid_dim.resize(dimension);
-    for (int i = 0; i < dimension; ++i)
+    proc_grid_loc.resize(balancer->get_dimension());
+    proc_grid_dim.resize(balancer->get_dimension());
+    for (int i = 0; i < balancer->get_dimension(); ++i)
     {
         proc_grid_loc.at(i) = loc[i];
         proc_grid_dim.at(i) = size[i];
@@ -406,17 +346,17 @@ template <class T, class W> void ALL<T,W>::set_communicator(MPI_Comm comm)
     if (MPI_Topo_test(comm, &comm_type) == MPI_CART)
     {
         communicator = comm;
+        balancer->set_communicator(communicator);
     }
     else
     {
         int rank;
         MPI_Comm_rank(comm, &rank);
-        if (rank == 0) std::cout << "creating internal cartesian communicator from process grid data" << std::endl;
         if (proc_grid_set)
         {
             // compute 1D index from the location in the process grid (using z-y-x ordering, as MPI_Dims_create)
             int idx;
-            switch (dimension)
+            switch (balancer->get_dimension())
             {
                 case 3:
                         idx = proc_grid_loc.at(2) +
@@ -446,7 +386,8 @@ template <class T, class W> void ALL<T,W>::set_communicator(MPI_Comm comm)
             std::vector<int> periods(3,1);
 
             // transform temporary communicator to cartesian communicator
-            MPI_Cart_create(temp_comm, dimension, proc_grid_dim.data(), periods.data(), 0, &communicator);
+            MPI_Cart_create(temp_comm, balancer->get_dimension(), proc_grid_dim.data(), periods.data(), 0, &communicator);
+            balancer->set_communicator(communicator);
         }
         else
         {
@@ -459,141 +400,60 @@ template <class T, class W> void ALL<T,W>::set_communicator(MPI_Comm comm)
     }
 }
 
-template <class T, class W> void ALL<T,W>::get_work(W& result)
+template <class T, class W> void ALL<T,W>::calculate_outline()
 {
-    result = work_array->at(0);
+    // calculation only possible if there are vertices defining the domain
+    if (balancer->get_vertices().size() > 0)
+    {
+        outline.resize(2);
+        // setup the outline with the first point
+        for (int i = 0; i < balancer->get_dimension(); ++i)
+        {
+            outline.at(0) = balancer->get_vertices().at(0);
+            outline.at(1) = balancer->get_vertices().at(0);
+        }
+        // compare value of each outline point with all vertices to find the maximum
+        // extend of the domain in each dimension
+        for (int i = 1; i < balancer->get_vertices().size(); ++i)
+        {
+            ALL_Point<T> p = balancer->get_vertices().at(i);
+            for( int j = 0; j < balancer->get_dimension(); ++j)
+            {
+                outline.at(0).set_coordinate(j, std::min(outline.at(0).x(j),p.x(j)));
+                outline.at(1).set_coordinate(j, std::max(outline.at(1).x(j),p.x(j)));
+            }
+        }
+    }
 }
 
-template <class T, class W> void ALL<T,W>::get_work(std::vector<W>& result)
+template <class T, class W> void ALL<T,W>::get_work(W& result)
 {
-    result = work_array;
+    result = balancer->get_work().at(0);
 }
 
-template <class T, class W> void ALL<T,W>::create_balancer(ALL_LB_t method)
+template <class T, class W> void ALL<T,W>::get_work(std::vector<W>& result)
 {
-    try
-    {
-        switch(method)
-        {
-            case ALL_LB_t::TENSOR:
-                balancer = new ALL_Tensor_LB<T,W>(dimension,work_array->at(0),gamma);
-                break;
-            case ALL_LB_t::STAGGERED:
-                balancer = new ALL_Staggered_LB<T,W>(dimension,work_array->at(0),gamma);
-                break;
-            case ALL_LB_t::UNSTRUCTURED:
-            	balancer = new ALL_Unstructured_LB<T,W>(dimension,work_array->at(0),gamma);
-                break;
-            case ALL_LB_t::VORONOI:
-#ifdef ALL_VORONOI                
-            	balancer = new ALL_Voronoi_LB<T,W>(dimension,work_array->at(0),gamma);
-#endif
-                break;
-            case ALL_LB_t::HISTOGRAM:
-                balancer = new ALL_Histogram_LB<T,W>(dimension,work_array->at(0),gamma);
-                break;
-            default:
-                throw ALL_Invalid_Argument_Exception(
-                                __FILE__,
-                                __func__,
-                                __LINE__,
-                                "Unknown type of loadbalancing passed.");
-                break;
-        }
-    }
-    catch (ALL_Custom_Exception e)
-    {
-        std::cout << e.what() << std::endl;
-        MPI_Abort(MPI_COMM_WORLD,-1);
-    }
+    result = balancer->get_work();
 }
 
-template <class T, class W> void ALL<T,W>::setup(ALL_LB_t method)
+template <class T, class W> void ALL<T,W>::setup()
 {
     try
     {
-        switch(method)
-        {
-            case ALL_LB_t::TENSOR:
-                if (balancer) delete (ALL_Tensor_LB<T,W>*)balancer;
-                balancer = new ALL_Tensor_LB<T,W>(dimension,work_array->at(0),gamma);
-                ((ALL_Tensor_LB<T,W>*)balancer)->set_vertices(outline->data());
-                ((ALL_Tensor_LB<T,W>*)balancer)->setup(communicator);
-                break;
-            case ALL_LB_t::STAGGERED:
-                if (balancer) delete (ALL_Staggered_LB<T,W>*)balancer;
-                balancer = new ALL_Staggered_LB<T,W>(dimension,work_array->at(0),gamma);
-                ((ALL_Staggered_LB<T,W>*)balancer)->set_vertices(outline->data());
-                ((ALL_Staggered_LB<T,W>*)balancer)->setup(communicator);
-                break;
-            case ALL_LB_t::UNSTRUCTURED:
-                if (balancer) delete (ALL_Unstructured_LB<T,W>*)balancer;
-            	balancer = new ALL_Unstructured_LB<T,W>(dimension,work_array->at(0),gamma);
-                ((ALL_Unstructured_LB<T,W>*)balancer)->set_vertices(vertices);
-                ((ALL_Unstructured_LB<T,W>*)balancer)->setup(communicator);
-                break;
-            case ALL_LB_t::VORONOI:
-#ifdef ALL_VORONOI
-                if (balancer) delete (ALL_Voronoi_LB<T,W>*)balancer;
-                balancer = new ALL_Voronoi_LB<T,W>(dimension,work_array->at(0),gamma);
-                ((ALL_Voronoi_LB<T,W>*)balancer)->set_vertices(vertices);
-                ((ALL_Voronoi_LB<T,W>*)balancer)->setup(communicator);
-#endif                
-                break;
-            case ALL_LB_t::HISTOGRAM:
-                if (balancer) delete (ALL_Histogram_LB<T,W>*)balancer;
-                balancer = new ALL_Histogram_LB<T,W>(dimension,work_array,gamma);
-                ((ALL_Histogram_LB<T,W>*)balancer)->set_vertices(outline->data());
-                ((ALL_Histogram_LB<T,W>*)balancer)->setup(communicator);
-                break;
-            default:
-                throw ALL_Invalid_Argument_Exception(
-                                        __FILE__,
-                                        __func__,
-                                        __LINE__,
-                                        "Unknown type of loadbalancing passed.");
-                break;
-        }
+        balancer->setup();
     }
     catch (ALL_Custom_Exception e)
     {
         std::cout << e.what() << std::endl;
         MPI_Abort(MPI_COMM_WORLD,-1);
     }
-    load_balancer_type = method;
 }
 
-template <class T, class W> void ALL<T,W>::setup_balancer(ALL_LB_t method)
+template <class T, class W> void ALL<T,W>::setup_balancer()
 {
     try
     {
-        switch(method)
-        {
-            case ALL_LB_t::TENSOR:
-                ((ALL_Tensor_LB<T,W>*)balancer)->setup(communicator);
-                break;
-            case ALL_LB_t::STAGGERED:
-                ((ALL_Staggered_LB<T,W>*)balancer)->setup(communicator);
-                break;
-            case ALL_LB_t::UNSTRUCTURED:
-                ((ALL_Unstructured_LB<T,W>*)balancer)->setup(communicator);
-                break;
-            case ALL_LB_t::VORONOI:
-#ifdef ALL_VORONOI
-                ((ALL_Voronoi_LB<T,W>*)balancer)->setup(communicator);
-#endif
-                break;
-            case ALL_LB_t::HISTOGRAM:
-                ((ALL_Histogram_LB<T,W>*)balancer)->setup(communicator);
-                break;
-            default:
-                throw ALL_Invalid_Argument_Exception(
-                                        __FILE__,
-                                        __func__,
-                                        __LINE__,
-                                        "Unknown type of loadbalancing passed.");
-                break;
-        }
+        balancer->setup(communicator);
     }
     catch (ALL_Custom_Exception e)
     {
@@ -602,148 +462,97 @@ template <class T, class W> void ALL<T,W>::setup_balancer(ALL_LB_t method)
     }
 }
 
-template <class T, class W> void ALL<T,W>::balance(ALL_LB_t method, bool internal)
+template <class T, class W> std::vector<ALL_Point<T>>& ALL<T,W>::balance(bool internal)
 {
     try
     {
-        n_vertices = vertices.size();
-        /*
-    	n_vertices = 2;
-        if (method == ALL_LB_t::UNSTRUCTURED)
-        {
-        	n_vertices = 8;
-        }
-        */
-    	std::vector<T> result(n_vertices*dimension);
+        n_vertices = balancer->get_vertices().size();
         switch(method)
         {
             case ALL_LB_t::TENSOR:
-                ((ALL_Tensor_LB<T,W>*)balancer)->set_vertices(outline->data());
-                result_vertices = vertices;
-                ((ALL_Tensor_LB<T,W>*)balancer)->balance();
-
-                ((ALL_Tensor_LB<T,W>*)balancer)->get_shifted_vertices(result);
-
-                for (int i = 0; i < result_vertices.size(); ++i)
-                {
-                    for (int d = 0; d < result_vertices.at(i).get_dimension(); ++d)
-                    {
-                        result_vertices.at(i).set_coordinate(
-                                d,
-                                result.at(i*result_vertices.at(i).get_dimension()+d)
-                                );
-                    }
-                }
-                
+                balancer->balance(loadbalancing_step);
                 break;
             case ALL_LB_t::STAGGERED:
+                
+                /* ToDo: Check if repetition is useful at all and
+                /*       change it to be included for all methods,
+                    *       not only STAGGERED */
+                /*
+                // estimation to improve speed of convergence
+                // changing vertices during estimation
+                std::vector<ALL_Point<T>> tmp_vertices = balancer->get_vertices();
+                // saved original vertices
+                std::vector<ALL_Point<T>> old_vertices = balancer->get_vertices();
+                // old temporary vertices
+                std::vector<ALL_Point<T>> old_tmp_vertices = balancer->get_vertices();
+                // result temporary vertices
+                std::vector<ALL_Point<T>> result_vertices = balancer->get_vertices();
+                // temporary work
+                W tmp_work = balancer->get_work().at(0);
+                // old temporary work
+                W old_tmp_work = balancer->get_work().at(0);
+
+                W max_work;
+                W sum_work;
+                double d_max;
+                int n_ranks;
+
+                // compute work density on local domain
+                double V = 1.0;
+                for (int i = 0; i < balancer->get_dimension(); ++i)
+                    V *= ( outline.at(1).x(i) - outline.at(0).x(i) );
+                double rho = work_array.at(0) / V;
+
+                // collect maximum work in system
+                MPI_Allreduce(work_array.data(), &max_work, 1, mpi_data_type_W, MPI_MAX, communicator);
+                MPI_Allreduce(work_array.data(), &sum_work, 1, mpi_data_type_W, MPI_SUM, communicator);
+                MPI_Comm_size(communicator,&n_ranks);
+                d_max = (double)(max_work) * (double)n_ranks / (double)sum_work - 1.0;
+                
+
+                for (int i = 0; i < estimation_limit && d_max > 0.1 && !internal; ++i)
                 {
-                    // estimation to improve speed of convergence
-                    // changing vertices during estimation
-                    std::vector<ALL_Point<T>> tmp_vertices = vertices;
-                    // saved original vertices
-                    std::vector<ALL_Point<T>> old_vertices = vertices;
-                    // old temporary vertices
-                    std::vector<ALL_Point<T>> old_tmp_vertices = vertices;
-                    // temporary work
-                    W tmp_work = work_array->at(0);
-                    // old temporary work
-                    W old_tmp_work = work_array->at(0);
-
-                    W max_work;
-                    W sum_work;
-                    double d_max;
-                    int n_ranks;
-
-                    // compute work density on local domain
-                    double V = 1.0;
-                    for (int i = 0; i < dimension; ++i)
-                        V *= ( outline->at(3+i) - outline->at(i) );
-                    double rho = work_array->at(0) / V;
-
-                    // collect maximum work in system
-                    MPI_Allreduce(work_array->data(), &max_work, 1, mpi_data_type_W, MPI_MAX, communicator);
-                    MPI_Allreduce(work_array->data(), &sum_work, 1, mpi_data_type_W, MPI_SUM, communicator);
-                    MPI_Comm_size(communicator,&n_ranks);
-                    d_max = (double)(max_work) * (double)n_ranks / (double)sum_work - 1.0;
-                    
-
-                    for (int i = 0; i < estimation_limit && d_max > 0.1 && !internal; ++i)
+                    balancer->set_work(tmp_work);
+                    balancer->setup();
+                    balancer->balance(true);
+                    bool sane = true;
+                    // check if the estimated boundaries are not too deformed
+                    for (int j = 0; j < balancer->get_dimension(); ++j)
+                        sane = sane && (result_vertices.at(0).x(j) <= old_vertices.at(1).x(j));
+                    MPI_Allreduce(MPI_IN_PLACE,&sane,1,MPI_CXX_BOOL,MPI_LAND,communicator);
+                    if (sane)
                     {
-                        work_array->at(0) = tmp_work;
-                        setup(method);
-                        balance(method,true);
-                        bool sane = true;
-                        // check if the estimated boundaries are not too deformed
-                        for (int j = 0; j < dimension; ++j)
-                            sane = sane && (result_vertices.at(0).x(j) <= old_vertices.at(1).x(j));
-                        MPI_Allreduce(MPI_IN_PLACE,&sane,1,MPI_CXX_BOOL,MPI_LAND,communicator);
-                        if (sane)
-                        {
-                            old_tmp_vertices = tmp_vertices;
-                            tmp_vertices = result_vertices;
-                            V = 1.0;
-                            for (int i = 0; i < dimension; ++i)
-                                V *= ( tmp_vertices.at(1).x(i) - tmp_vertices.at(0).x(i) );
-                            old_tmp_work = tmp_work;
-                            tmp_work = rho * V;
-                        }
-                        else if (!sane || i == estimation_limit - 1)
-                        {
-                            vertices = old_tmp_vertices;
-                            work_array->at(0) = old_tmp_work;
-                            calculate_outline();
-                            i = estimation_limit;
-                        }
+                        old_tmp_vertices = tmp_vertices;
+                        tmp_vertices = result_vertices;
+                        V = 1.0;
+                        for (int i = 0; i < balancer->get_dimension(); ++i)
+                            V *= ( tmp_vertices.at(1).x(i) - tmp_vertices.at(0).x(i) );
+                        old_tmp_work = tmp_work;
+                        tmp_work = rho * V;
                     }
-
-                    ((ALL_Staggered_LB<T,W>*)balancer)->set_vertices(outline->data());
-                    result_vertices = vertices;
-                    ((ALL_Staggered_LB<T,W>*)balancer)->balance();
-
-                    ((ALL_Staggered_LB<T,W>*)balancer)->get_shifted_vertices(result);
-                    for (int i = 0; i < result_vertices.size(); ++i)
+                    else if (!sane || i == estimation_limit - 1)
                     {
-                        for (int d = 0; d < result_vertices.at(i).get_dimension(); ++d)
-                        {
-                            result_vertices.at(i).set_coordinate(
-                                    d,
-                                    result.at(i*result_vertices.at(i).get_dimension()+d)
-                                    );
-                        }
+                        balancer->get_vertices() = old_tmp_vertices;
+                        work_array->at(0) = old_tmp_work;
+                        calculate_outline();
+                        i = estimation_limit;
                     }
                 }
+
+                ((ALL_Staggered_LB<T,W>*)balancer.get())->set_vertices(outline->data());
+                */
+                balancer->balance(loadbalancing_step);
                 break;
             case ALL_LB_t::UNSTRUCTURED:
-            	((ALL_Unstructured_LB<T,W>*)balancer)->set_vertices(vertices);
-                result_vertices = vertices;
-                ((ALL_Unstructured_LB<T,W>*)balancer)->balance();
-                ((ALL_Unstructured_LB<T,W>*)balancer)->get_shifted_vertices(result_vertices);
+                balancer->balance(loadbalancing_step);
                 break;
             case ALL_LB_t::VORONOI:
 #ifdef ALL_VORONOI
-            	((ALL_Voronoi_LB<T,W>*)balancer)->set_vertices(vertices);
-            	((ALL_Voronoi_LB<T,W>*)balancer)->set_step(loadbalancing_step);
-                result_vertices = vertices;
-                ((ALL_Voronoi_LB<T,W>*)balancer)->balance();
-                ((ALL_Voronoi_LB<T,W>*)balancer)->get_shifted_vertices(result_vertices);
+                balancer->balance(loadbalancing_step);
 #endif
                 break;
             case ALL_LB_t::HISTOGRAM:
-            	((ALL_Histogram_LB<T,W>*)balancer)->set_vertices(outline->data());
-                result_vertices = vertices;
-                ((ALL_Histogram_LB<T,W>*)balancer)->balance(loadbalancing_step);
-                ((ALL_Histogram_LB<T,W>*)balancer)->get_shifted_vertices(result);
-                for (int i = 0; i < result_vertices.size(); ++i)
-                {
-                    for (int d = 0; d < result_vertices.at(i).get_dimension(); ++d)
-                    {
-                        result_vertices.at(i).set_coordinate(
-                                d,
-                                result.at(i*result_vertices.at(i).get_dimension()+d)
-                                );
-                    }
-                }
+                balancer->balance(loadbalancing_step);
                 break;
             default:
                 throw ALL_Invalid_Argument_Exception(
@@ -751,7 +560,6 @@ template <class T, class W> void ALL<T,W>::balance(ALL_LB_t method, bool interna
                                         __func__,
                                         __LINE__,
                                         "Unknown type of loadbalancing passed.");
-                break;
         }
         loadbalancing_step++;
     }
@@ -760,79 +568,16 @@ template <class T, class W> void ALL<T,W>::balance(ALL_LB_t method, bool interna
         std::cout << e.what() << std::endl;
         MPI_Abort(MPI_COMM_WORLD,-1);
     }
-}
+    return balancer->get_shifted_vertices();
 
-template <class T, class W> void ALL<T,W>::cleanup(ALL_LB_t method)
-{
-    try
-    {
-        switch(method)
-        {
-            case ALL_LB_t::TENSOR:
-                delete((ALL_Tensor_LB<T,W>*)balancer);
-                break;
-            case ALL_LB_t::STAGGERED:
-                delete((ALL_Staggered_LB<T,W>*)balancer);
-                break;
-            case ALL_LB_t::UNSTRUCTURED:
-                delete((ALL_Unstructured_LB<T,W>*)balancer);
-                break;
-            case ALL_LB_t::VORONOI:
-#ifdef ALL_VORONOI
-                delete((ALL_Voronoi_LB<T,W>*)balancer);
-#endif
-                break;
-            case ALL_LB_t::HISTOGRAM:
-                delete((ALL_Histogram_LB<T,W>*)balancer);
-                break;
-            default:
-                throw ALL_Invalid_Argument_Exception(
-                                        __FILE__,
-                                        __func__,
-                                        __LINE__,
-                                        "Unknown type of loadbalancing passed.");
-                break;
-        }
-    }
-    catch(ALL_Custom_Exception e)
-    {
-        std::cout << e.what() << std::endl;
-        MPI_Abort(MPI_COMM_WORLD,-1);
-    }
 }
 
-template <class T, class W> int ALL<T,W>::get_n_neighbors(ALL_LB_t method)
+template <class T, class W> int ALL<T,W>::get_n_neighbors()
 {
     std::vector<int> neig;
     try
     {
-        switch(method)
-        {
-            case ALL_LB_t::TENSOR:
-                ((ALL_Tensor_LB<T,W>*)balancer)->get_neighbors(neig);
-                break;
-            case ALL_LB_t::STAGGERED:
-                ((ALL_Staggered_LB<T,W>*)balancer)->get_neighbors(neig);
-                break;
-            case ALL_LB_t::UNSTRUCTURED:
-                ((ALL_Unstructured_LB<T,W>*)balancer)->get_neighbors(neig);
-                break;
-            case ALL_LB_t::VORONOI:
-#ifdef ALL_VORONOI
-                ((ALL_Voronoi_LB<T,W>*)balancer)->get_neighbors(neig);
-#endif
-                break;
-            case ALL_LB_t::HISTOGRAM:
-                ((ALL_Histogram_LB<T,W>*)balancer)->get_neighbors(neig);
-                break;
-            default:
-                throw ALL_Invalid_Argument_Exception(
-                                        __FILE__,
-                                        __func__,
-                                        __LINE__,
-                                        "Unknown type of loadbalancing passed.");
-                break;
-        }
+        balancer->get_neighbors(neig);
     }
     catch(ALL_Custom_Exception e)
     {
@@ -842,7 +587,7 @@ template <class T, class W> int ALL<T,W>::get_n_neighbors(ALL_LB_t method)
     return neig.size();
 }
 
-template <class T, class W> void ALL<T,W>::get_neighbor_vertices(ALL_LB_t method, std::vector<T>& nv)
+template <class T, class W> void ALL<T,W>::get_neighbor_vertices(std::vector<T>& nv)
 {
     switch(method)
     {
@@ -854,7 +599,7 @@ template <class T, class W> void ALL<T,W>::get_neighbor_vertices(ALL_LB_t method
             break;
         case ALL_LB_t::VORONOI:
 #ifdef ALL_VORONOI
-            ((ALL_Voronoi_LB<T,W>*)balancer)->get_neighbor_vertices(nv);
+            ((ALL_Voronoi_LB<T,W>*)balancer.get())->get_neighbor_vertices(nv);
 #endif
             break;
         case ALL_LB_t::HISTOGRAM:
@@ -864,37 +609,11 @@ template <class T, class W> void ALL<T,W>::get_neighbor_vertices(ALL_LB_t method
     }
 }
 
-template <class T, class W> void ALL<T,W>::set_min_domain_size(ALL_LB_t method, T* min_size)
+template <class T, class W> void ALL<T,W>::set_min_domain_size(const std::vector<T>& min_size)
 {
     try
     {
-        switch(method)
-        {
-            case ALL_LB_t::TENSOR:
-                //((ALL_Tensor_LB<T,W>*)balancer)->get_neighbors(neig);
-                break;
-            case ALL_LB_t::STAGGERED:
-                ((ALL_Staggered_LB<T,W>*)balancer)->set_min_domain_size(min_size);
-                break;
-            case ALL_LB_t::UNSTRUCTURED:
-                //((ALL_Unstructured_LB<T,W>*)balancer)->get_neighbors(neig);
-                break;
-            case ALL_LB_t::VORONOI:
-#ifdef ALL_VORONOI
-                //((ALL_Voronoi_LB<T,W>*)balancer)->get_neighbors(neig);
-#endif
-                break;
-            case ALL_LB_t::HISTOGRAM:
-                //((ALL_Histogram_LB<T,W>*)balancer)->get_neighbors(neig);
-                break;
-            default:
-                throw ALL_Invalid_Argument_Exception(
-                                        __FILE__,
-                                        __func__,
-                                        __LINE__,
-                                        "Unknown type of loadbalancing passed.");
-                break;
-        }
+        (balancer.get())->set_min_domain_size(min_size);
     }
     catch(ALL_Custom_Exception e)
     {
@@ -903,37 +622,11 @@ template <class T, class W> void ALL<T,W>::set_min_domain_size(ALL_LB_t method,
     }
 }
 
-template <class T, class W> void ALL<T,W>::get_neighbors(ALL_LB_t method, std::vector<int>& neig)
+template <class T, class W> void ALL<T,W>::get_neighbors(std::vector<int>& neig)
 {
     try
     {
-        switch(method)
-        {
-            case ALL_LB_t::TENSOR:
-                ((ALL_Tensor_LB<T,W>*)balancer)->get_neighbors(neig);
-                break;
-            case ALL_LB_t::STAGGERED:
-                ((ALL_Staggered_LB<T,W>*)balancer)->get_neighbors(neig);
-                break;
-            case ALL_LB_t::UNSTRUCTURED:
-                ((ALL_Unstructured_LB<T,W>*)balancer)->get_neighbors(neig);
-                break;
-            case ALL_LB_t::VORONOI:
-#ifdef ALL_VORONOI
-                ((ALL_Voronoi_LB<T,W>*)balancer)->get_neighbors(neig);
-#endif
-                break;
-            case ALL_LB_t::HISTOGRAM:
-                ((ALL_Histogram_LB<T,W>*)balancer)->get_neighbors(neig);
-                break;
-            default:
-                throw ALL_Invalid_Argument_Exception(
-                                        __FILE__,
-                                        __func__,
-                                        __LINE__,
-                                        "Unknown type of loadbalancing passed.");
-                break;
-        }
+        balancer->get_neighbors(neig);
     }
     catch(ALL_Custom_Exception e)
     {
@@ -942,37 +635,11 @@ template <class T, class W> void ALL<T,W>::get_neighbors(ALL_LB_t method, std::v
     }
 }
 
-template <class T, class W> void ALL<T,W>::get_neighbors(ALL_LB_t method, int** neig)
+template <class T, class W> void ALL<T,W>::get_neighbors(int** neig)
 {
     try
     {
-        switch(method)
-        {
-            case ALL_LB_t::TENSOR:
-                ((ALL_Tensor_LB<T,W>*)balancer)->get_neighbors(neig);
-                break;
-            case ALL_LB_t::STAGGERED:
-                ((ALL_Staggered_LB<T,W>*)balancer)->get_neighbors(neig);
-                break;
-            case ALL_LB_t::UNSTRUCTURED:
-                ((ALL_Unstructured_LB<T,W>*)balancer)->get_neighbors(neig);
-                break;
-            case ALL_LB_t::VORONOI:
-#ifdef ALL_VORONOI
-                ((ALL_Voronoi_LB<T,W>*)balancer)->get_neighbors(neig);
-#endif
-                break;
-            case ALL_LB_t::HISTOGRAM:
-                ((ALL_Histogram_LB<T,W>*)balancer)->get_neighbors(neig);
-                break;
-            default:
-                throw ALL_Invalid_Argument_Exception(
-                                        __FILE__,
-                                        __func__,
-                                        __LINE__,
-                                        "Unknown type of loadbalancing passed.");
-                break;
-        }
+        balancer->get_neighbors(neig);
     }
     catch(ALL_Custom_Exception e)
     {
@@ -981,53 +648,14 @@ template <class T, class W> void ALL<T,W>::get_neighbors(ALL_LB_t method, int**
     }
 }
 
-template <class T, class W> void ALL<T,W>::set_sys_size(ALL_LB_t method, std::vector<T>& s_size)
+template <class T, class W> void ALL<T,W>::set_sys_size(std::vector<T>& s_size)
 {
-    switch(method)
-    {
-        case ALL_LB_t::TENSOR:
-            break;
-        case ALL_LB_t::STAGGERED:
-            break;
-        case ALL_LB_t::UNSTRUCTURED:
-            break;
-        case ALL_LB_t::VORONOI:
-#ifdef ALL_VORONOI
-            ((ALL_Voronoi_LB<T,W>*)balancer)->set_sys_size(s_size);
-#endif
-            break;
-        case ALL_LB_t::HISTOGRAM:
-            ((ALL_Histogram_LB<T,W>*)balancer)->set_sys_size(s_size);
-            break;
-        default:
-            throw ALL_Invalid_Argument_Exception(
-                                        __FILE__,
-                                        __func__,
-                                        __LINE__,
-                                        "Unknown type of loadbalancing passed.");
-            break;
-    }
-
+    balancer.get()->set_sys_size(s_size);
 }
 
-template <class T, class W> void ALL<T,W>::set_method_data(ALL_LB_t method, void* data)
+template <class T, class W> void ALL<T,W>::set_method_data(void* data)
 {
-    switch(method)
-    {
-        case ALL_LB_t::TENSOR:
-            break;
-        case ALL_LB_t::STAGGERED:
-            break;
-        case ALL_LB_t::UNSTRUCTURED:
-            break;
-#ifdef ALL_VORONOI
-        case ALL_LB_t::VORONOI:
-            break;
-#endif        
-        case ALL_LB_t::HISTOGRAM:
-            ((ALL_Histogram_LB<T,W>*)balancer)->set_data(data);
-            break;
-    }
+    balancer.get()->set_data(data);
 }
 
 #ifdef ALL_VTK_OUTPUT
@@ -1036,7 +664,7 @@ template <class T, class W> void ALL<T,W>::print_vtk_outlines(int step)
     // define grid points, i.e. vertices of local domain
     auto points = vtkSmartPointer<vtkPoints>::New();
     // allocate space for eight points (describing the cuboid around the domain)
-    points->Allocate(8 * dimension);
+    points->Allocate(8 * balancer->get_dimension());
 
     int n_ranks;
     int local_rank;
@@ -1053,8 +681,8 @@ template <class T, class W> void ALL<T,W>::print_vtk_outlines(int step)
     MPI_Comm_size(communicator,&n_ranks);
 
     std::vector<ALL_Point<W>> tmp_outline(2);
-    tmp_outline.at(0) = ALL_Point<W>(3,outline->data());
-    tmp_outline.at(1) = ALL_Point<W>(3,outline->data()+3);
+    tmp_outline.at(0) = ALL_Point<W>(3,{outline.at(0).x(0),outline.at(0).x(1),outline.at(0).x(2)});
+    tmp_outline.at(1) = ALL_Point<W>(3,{outline.at(1).x(0),outline.at(1).x(1),outline.at(1).x(2)});
 
     for (auto z = 0; z <= 1; ++z)
         for (auto y = 0; y <= 1; ++y)
@@ -1078,7 +706,7 @@ template <class T, class W> void ALL<T,W>::print_vtk_outlines(int step)
     work->SetNumberOfComponents(1);
     work->SetNumberOfTuples(1);
     work->SetName("work");
-    W total_work = std::accumulate(work_array->begin(),work_array->end(),(W)0);
+    W total_work = std::accumulate(balancer->get_work().begin(),balancer->get_work().end(),(W)0);
     work->SetValue(0, total_work);
 
     // determine extent of system
@@ -1092,8 +720,8 @@ template <class T, class W> void ALL<T,W>::print_vtk_outlines(int step)
 
     for(int i = 0; i < 3; ++i)
     {
-        local_min[i] = outline->at(i);
-        local_max[i] = outline->at(3+i);
+        local_min[i] = outline.at(0).x(i);
+        local_max[i] = outline.at(1).x(i);
         width[i] = local_max[i] - local_min[i];
         volume *= width[i];
     } 
@@ -1173,26 +801,26 @@ template <class T, class W> void ALL<T,W>::print_vtk_vertices(int step)
 
         // local vertices
         // (vertices + work)
-        T local_vertices[n_vertices * dimension + 1];
+        T local_vertices[n_vertices * balancer->get_dimension() + 1];
 
         for (int v = 0; v < n_vertices; ++v)
         {
-            for (int d = 0; d < dimension; ++d)
+            for (int d = 0; d < balancer->get_dimension(); ++d)
             {
-                local_vertices[v * dimension + d] = vertices.at(v).x(d);
+                local_vertices[v * balancer->get_dimension() + d] = balancer->get_vertices().at(v).x(d);
             }
         }
-        local_vertices[n_vertices * dimension] = (T)work_array->at(0);
+        local_vertices[n_vertices * balancer->get_dimension()] = (T)balancer->get_work().at(0);
 
         T* global_vertices;
         if (local_rank == 0)
         {
-            global_vertices = new T[n_ranks * (n_vertices * dimension + 1)];
+            global_vertices = new T[n_ranks * (n_vertices * balancer->get_dimension() + 1)];
         }
 
         // collect all works and vertices on a single process
-        MPI_Gather(local_vertices, n_vertices * dimension + 1, mpi_data_type_T,
-                   global_vertices,n_vertices * dimension + 1, mpi_data_type_T,
+        MPI_Gather(local_vertices, n_vertices * balancer->get_dimension() + 1, mpi_data_type_T,
+                   global_vertices,n_vertices * balancer->get_dimension() + 1, mpi_data_type_T,
                    0, communicator
                   );
 
@@ -1207,9 +835,9 @@ template <class T, class W> void ALL<T,W>::print_vtk_vertices(int step)
                 for (int v = 0; v < n_vertices; ++v)
                 {
                     vtkpoints->InsertNextPoint(
-                        global_vertices[ i * (n_vertices * dimension + 1) + v * dimension + 0 ],
-                        global_vertices[ i * (n_vertices * dimension + 1) + v * dimension + 1 ],
-                        global_vertices[ i * (n_vertices * dimension + 1) + v * dimension + 2 ]
+                        global_vertices[ i * (n_vertices * balancer->get_dimension() + 1) + v * balancer->get_dimension() + 0 ],
+                        global_vertices[ i * (n_vertices * balancer->get_dimension() + 1) + v * balancer->get_dimension() + 1 ],
+                        global_vertices[ i * (n_vertices * balancer->get_dimension() + 1) + v * balancer->get_dimension() + 2 ]
                             );
                 }
             }
@@ -1266,8 +894,8 @@ template <class T, class W> void ALL<T,W>::print_vtk_vertices(int step)
 
                 unstructuredGrid->InsertNextCell(VTK_POLYHEDRON, 8, pointIds,
                                                  12, faces->GetPointer());
-                work->SetValue(n,global_vertices[ n * (n_vertices * dimension + 1) + 
-                                                  8 * dimension ]);
+                work->SetValue(n,global_vertices[ n * (n_vertices * balancer->get_dimension() + 1) + 
+                                                  8 * balancer->get_dimension() ]);
                 cell->SetValue(n,(T)n);
             }
             unstructuredGrid->GetCellData()->AddArray(work);
diff --git a/include/ALL_Histogram.hpp b/include/ALL_Histogram.hpp
index 8f2213e69bee9b68a96a69157a1bd6deebec3ff8..6e9edd77aad9a1c22571f89a1654708825e8767b 100644
--- a/include/ALL_Histogram.hpp
+++ b/include/ALL_Histogram.hpp
@@ -34,96 +34,53 @@ POSSIBILITY OF SUCH DAMAGE.
 #include <mpi.h>
 #include <exception>
 #include <vector>
+#include "ALL_LB.hpp"
 #include "ALL_CustomExceptions.hpp"
 
 // T: data type of vertices used in the balancer
 // W: data type of the work used in the balancer
-template <class T, class W> class ALL_Histogram_LB
+template <class T, class W> class ALL_Histogram_LB : public ALL_LB<T,W>
 {
     public:
         ALL_Histogram_LB() {}
-        ALL_Histogram_LB(int d, std::vector<W>* w, T g) : dimension(d),
-                                         work(*w), 
-                                         gamma(g)
+        ALL_Histogram_LB(int d, std::vector<W> w, T g) : 
+            ALL_LB<T,W>(d, g)
         {
+            this->set_work(w);
             // need the lower and upper bounds
             // of the domain for the tensor-based
             // load-balancing scheme
-            vertices = new std::vector<T>(2*dimension);
-            shifted_vertices = new std::vector<T>(2*dimension);
-            // periodicity of the MPI communicator / system
-            periodicity = new int[dimension];
-
+            
             // array of MPI communicators for each direction (to collect work
             // on each plane)
-            communicators = new MPI_Comm[2*dimension];
-            n_neighbors = new std::vector<int>(2*dimension);
-            sys_size.resize(2*dimension);
-            n_bins = new std::vector<int>(dimension);
+            communicators.resize(2*this->dimension);
+            n_neighbors.resize(2*this->dimension);
+            n_bins.resize(this->dimension);
         }
 
-        ~ALL_Histogram_LB(); 
-
-        void set_vertices(T*);
+        ~ALL_Histogram_LB() = default;
 
         // setup communicators for the exchange of work in each domain
-        void setup(MPI_Comm);
+        void setup() override;
 
         // do a load-balancing step
-        virtual void balance(int);
+        void balance(int) override;
 
         // getter for variables (passed by reference to avoid
         // direct access to private members of the object)
 
-        // getter for base vertices
-        void get_vertices(T*);
-        // getter for shifted vertices
-        void get_shifted_vertices(T*);
-        // getter for shifted vertices
-        void get_shifted_vertices(std::vector<T>&);
-
         // neighbors
         // provide list of neighbors
-        virtual void get_neighbors(std::vector<int>&);
+        void get_neighbors(std::vector<int>&) override;
         // provide list of neighbors in each direction
-        virtual void get_neighbors(int**);
-
-        // set system size for computation of histogram slice thickness
-        void set_sys_size(std::vector<T>&);
+        void get_neighbors(int**) override;
 
         // set required data for the method:
         // int, int, int (n_bins for each direction)
-        void set_data(void*);
+        virtual void set_data(void*) override;
 
     private:
-        // dimension of the system
-        int dimension;
-
-        // shift cooeficient
-        T gamma;
-
-        // work distribution
-        std::vector<W> work;
-
-        // vertices
-        std::vector<T>* vertices; 
-
-        // vertices after load-balancing shift
-        std::vector<T>* shifted_vertices; 
-
-        // MPI values
-        // global communicator
-        MPI_Comm global_comm;
-        // rank of the local domain in the communicator
-        int local_rank;
-        // global dimension of the system in each coordinate
-        int* global_dims;
-        // coordinates of the local domain in the system
-        int* local_coords;
-        // periodicity of the MPI communicator / system
-        int* periodicity;
-        
-        std::vector<int>* n_bins;
+        std::vector<int> n_bins;
 
         // type for MPI communication
         MPI_Datatype mpi_data_type_T;
@@ -131,93 +88,38 @@ template <class T, class W> class ALL_Histogram_LB
 
         // array of MPI communicators for each direction (to collect work
         // on each plane)
-        MPI_Comm* communicators;
+        std::vector<MPI_Comm> communicators;
 
         // list of neighbors
         std::vector<int> neighbors;
-        std::vector<int>* n_neighbors;
-
-        // size of system
-        std::vector<T> sys_size;
+        std::vector<int> n_neighbors;
 
         // find neighbors (more sophisticated than for tensor-product
         // variant of LB)
         void find_neighbors();
 };
 
-template <class T, class W> ALL_Histogram_LB<T,W>::~ALL_Histogram_LB()
-{
-    if (vertices) delete vertices;
-    if (shifted_vertices) delete shifted_vertices;
-    if (global_dims) delete[] global_dims;
-    if (local_coords) delete[] local_coords;
-    if (periodicity) delete[] periodicity;
-    if (communicators) delete[] communicators;
-    if (n_neighbors) delete n_neighbors;
-    if (n_bins) delete n_bins;
-}
-
-template <class T, class W> void ALL_Histogram_LB<T,W>::get_vertices(T* result)
-{
-    for (int i = 0; i < 2*dimension; ++i)
-    {
-        result[i] = vertices->at(i);
-    }
-}
-
-template <class T, class W> void ALL_Histogram_LB<T,W>::get_shifted_vertices(T* result)
-{
-    for (int i = 0; i < 2*dimension; ++i)
-    {
-        result[i] = shifted_vertices->at(i);
-    }
-}
-
-template <class T, class W> void ALL_Histogram_LB<T,W>::get_shifted_vertices(std::vector<T>& result)
-{
-    for (int i = 0; i < 2*dimension; ++i)
-    {
-        result.at(i) = shifted_vertices->at(i);
-    }
-}
-
-// set the actual vertices (unsafe due to array copy)
-template <class T, class W> void ALL_Histogram_LB<T,W>::set_vertices(T* v)
-{
-    for (int i = 0; i < 2*dimension; ++i)
-    {
-        vertices->at(i) = v[i];
-    }
-}
-
-template <class T, class W> void ALL_Histogram_LB<T,W>::set_sys_size(std::vector<T>& ss)
-{
-    for (auto i = 0; i < sys_size.size(); ++i)
-    {
-        sys_size.at(i) = ss.at(i);
-    }
-}
-
 template <class T, class W> void ALL_Histogram_LB<T,W>::set_data(void* data)
 {
-    if (n_bins->size() < 3)
-        n_bins->resize(3);
+    if (n_bins.size() < 3)
+        n_bins.resize(3);
+    
     for (int i = 0; i < 3; ++i)
-        n_bins->at(i) = *((int*)data+i);
+        n_bins.at(i) = *((int*)data+i);
 }
 
 // setup routine for the tensor-based load-balancing scheme
 // requires: 
-//              global_comm (int): cartesian MPI communicator, from
+//              this->global_comm (int): cartesian MPI communicator, from
 //                                 which separate sub communicators
 //                                 are derived in order to represent
 //                                 each plane of domains in the system
-template <class T, class W> void ALL_Histogram_LB<T,W>::setup(MPI_Comm comm)
+template <class T, class W> void ALL_Histogram_LB<T,W>::setup()
 {
     int status;
 
     // check if Communicator is cartesian
-    MPI_Topo_test(comm, &status);
+    MPI_Topo_test(this->global_comm, &status);
     if (status != MPI_CART)
     {
         throw ALL_Invalid_Comm_Type_Exception(
@@ -226,20 +128,11 @@ template <class T, class W> void ALL_Histogram_LB<T,W>::setup(MPI_Comm comm)
                 __LINE__,
                 "Cartesian MPI communicator required, passed communicator is not cartesian");
     }
-    
-    // allocate arrays to correct sizes
-    global_dims = new int[dimension];
-    local_coords = new int[dimension];
-    periodicity = new int[dimension];
-
-    // store MPI communicator to local variable
-    global_comm = comm;
-
-    // get the local coordinates, periodicity and global size from the MPI communicator
-    MPI_Cart_get(global_comm, dimension, global_dims, periodicity, local_coords);
+    // get the local coordinates, this->periodicity and global size from the MPI communicator
+    MPI_Cart_get(this->global_comm, this->dimension, this->global_dims.data(), this->periodicity.data(), this->local_coords.data());
 
     // get the local rank from the MPI communicator
-    MPI_Cart_rank(global_comm, local_coords, &local_rank);
+    MPI_Cart_rank(this->global_comm, this->local_coords.data(), &this->local_rank);
 
     // create sub-communicators 
 
@@ -249,16 +142,16 @@ template <class T, class W> void ALL_Histogram_LB<T,W>::setup(MPI_Comm comm)
     // reduction sub-communicators (equal to staggered grid)
 
     // z-plane
-    MPI_Comm_split(global_comm,
-                   local_coords[2],
-                   local_coords[0]+local_coords[1]*global_dims[0],
-                   &communicators[2]);
+    MPI_Comm_split(this->global_comm,
+                   this->local_coords.at(2),
+                   this->local_coords.at(0)+this->local_coords.at(1)*this->global_dims.at(0),
+                   &communicators.at(2));
 
     // y-column
-    MPI_Comm_split(global_comm,
-                   local_coords[2] * global_dims[1] +local_coords[1],
-                   local_coords[0],
-                   &communicators[1]);
+    MPI_Comm_split(this->global_comm,
+                   this->local_coords.at(2) * this->global_dims.at(1) +this->local_coords.at(1),
+                   this->local_coords.at(0),
+                   &communicators.at(1));
 
     // only cell itself
     communicators[0] = MPI_COMM_SELF;
@@ -266,23 +159,22 @@ template <class T, class W> void ALL_Histogram_LB<T,W>::setup(MPI_Comm comm)
     // gathering sub-communicators
 
     // x-gathering (same y/z coordinates)
-    MPI_Comm_split(global_comm,
-                   local_coords[1] + local_coords[2]*global_dims[1], 
-                   local_coords[0], 
-                   &communicators[3]);
+    MPI_Comm_split(this->global_comm,
+                   this->local_coords.at(1) + this->local_coords.at(2)*this->global_dims.at(1), 
+                   this->local_coords.at(0), 
+                   &communicators.at(3));
 
     // y-gathering
-    MPI_Comm_split(global_comm,
-                   local_coords[0] + local_coords[2]*global_dims[0], 
-                   local_coords[1], 
-                   &communicators[4]);
+    MPI_Comm_split(this->global_comm,
+                   this->local_coords.at(0) + this->local_coords.at(2)*this->global_dims.at(0), 
+                   this->local_coords.at(1), 
+                   &communicators.at(4));
 
     // z-gathering
-    MPI_Comm_split(global_comm,
-                   local_coords[0] + local_coords[1]*global_dims[0], 
-                   local_coords[2], 
-                   &communicators[5]);
-
+    MPI_Comm_split(this->global_comm,
+                   this->local_coords.at(0) + this->local_coords.at(1)*this->global_dims.at(0), 
+                   this->local_coords.at(2), 
+                   &communicators.at(5));
 
     // determine correct MPI data type for template T
     if (std::is_same<T,double>::value) mpi_data_type_T = MPI_DOUBLE;
@@ -316,9 +208,9 @@ template <class T, class W> void ALL_Histogram_LB<T,W>::setup(MPI_Comm comm)
     int rank_left, rank_right;
 
     neighbors.clear();
-    for (int i = 0; i < dimension; ++i)
+    for (int i = 0; i < this->dimension; ++i)
     {
-        MPI_Cart_shift(global_comm,i,1,&rank_left,&rank_right);
+        MPI_Cart_shift(this->global_comm,i,1,&rank_left,&rank_right);
         neighbors.push_back(rank_left);
         neighbors.push_back(rank_right);
     }
@@ -340,42 +232,44 @@ template<class T, class W> void ALL_Histogram_LB<T,W>::balance(int step)
     W work_deviation;
 
     // vector to store all values of the histogram for the current dimension
-    std::vector<W> work_dimension(n_bins->at(i));
+    std::vector<W> work_dimension(n_bins.at(i));
 
     // compute total number of bins in dimension
     int n_bins_dimension;
-    MPI_Allreduce(&(n_bins->at(i)),
+    MPI_Allreduce(&(n_bins.at(i)),
             &n_bins_dimension,
             1,
             MPI_INT,
             MPI_SUM,
-            communicators[i+3]);
+            communicators.at(i+3));
 
     std::vector<W> work_collection(n_bins_dimension);
-    std::vector<int> histogram_slices(global_dims[i]);
-    std::vector<W> work_new_dimension(global_dims[i], 0.0);
+    std::vector<int> histogram_slices(this->global_dims.at(i));
+    std::vector<W> work_new_dimension(this->global_dims.at(i), 0.0);
 
     // collect how many bins from each process will be received
-    MPI_Allgather(&(n_bins->at(i)),
+    MPI_Allgather(&(n_bins.at(i)),
                   1,
                   MPI_INT,
                   histogram_slices.data(),
                   1,
                   MPI_INT,
-                  communicators[i+3]);
-
+                  communicators.at(i+3));
 
     // add up work to get total work on domain
-    for ( int n = 0; n < n_bins->at(i); ++n )
-        work_sum_local += work.at(n);
-
+    for ( int n = 0; n < n_bins.at(i); ++n )
+    {
+//        std::cout << "DEBUG: " << this->work.size() << " " << n << " " << i << " on " << this->local_rank << std::endl;
+        work_sum_local += this->work.at(n);
+    }
+        
     // compute total work in current dimension
     MPI_Allreduce(&work_sum_local, 
                   &work_sum_dimension, 
                   1, 
                   mpi_data_type_W, 
                   MPI_SUM, 
-                  communicators[i]);
+                  communicators.at(i));
 
     // compute total work for current dimension
     MPI_Allreduce(&work_sum_dimension, 
@@ -383,31 +277,31 @@ template<class T, class W> void ALL_Histogram_LB<T,W>::balance(int step)
                   1, 
                   mpi_data_type_W, 
                   MPI_SUM, 
-                  communicators[i+3]);
-
-    int avg_num = global_dims[i];
+                  communicators.at(i+3));
+    
+    int avg_num = this->global_dims.at(i);
     work_avg_dimension = work_tot_dimension / (W)avg_num;
 
     // compute local slice of the histogram
-    MPI_Allreduce(work.data(), 
+    MPI_Allreduce(this->work.data(), 
                   work_dimension.data(), 
-                  n_bins->at(i),
+                  n_bins.at(i),
                   mpi_data_type_W,
                   MPI_SUM,
-                  communicators[i]);
+                  communicators.at(i));
     // displacement array
-    std::vector<int> displs(global_dims[i],0);
+    std::vector<int> displs(this->global_dims.at(i),0);
     int tmp = 0;
-    for (int n = 0; n < global_dims[i]; ++n)
+
+    for (int n = 0; n < this->global_dims.at(i); ++n)
     {
         displs[n] = tmp;
         tmp += histogram_slices.at(n);
     }
 
-
     // gather complete histogram in current dimension
     MPI_Allgatherv(work_dimension.data(),
-                   n_bins->at(i),
+                   n_bins.at(i),
                    mpi_data_type_W, 
                    work_collection.data(), 
                    histogram_slices.data(), 
@@ -417,26 +311,39 @@ template<class T, class W> void ALL_Histogram_LB<T,W>::balance(int step)
 
     int current_slice = 0;
 
+    double work_sum = 0.0;
+
     // TODO: compute cumulative function - up to work_avg_dimension work in each box
     for (int idx = 0; idx < work_collection.size(); ++idx)
     {
-        work_new_dimension.at(current_slice) += work_collection.at(idx);
+        //work_new_dimension.at(current_slice) += work_collection.at(idx);
+        work_sum += work_collection.at(idx);
         /*
-        if (i == 1 && local_rank == 0)
+        if (i == 1 && this->local_rank == 0)
             std::cout << "DEBUG: " << current_slice << " " << idx << " " << work_new_dimension.at(current_slice) << " " << work_collection.at(idx+1) << 
                 " " << work_collection.size() << " " << work_avg_dimension << " " << work_tot_dimension << " " << work_sum_dimension << std::endl;
         */
         if (idx < work_collection.size() - 1 && 
-            work_new_dimension.at(current_slice) + work_collection.at(idx+1) 
-                > work_avg_dimension
+            //work_new_dimension.at(current_slice) + work_collection.at(idx+1) 
+            //    > work_avg_dimension
+                work_sum + work_collection.at(idx+1) 
+                > (current_slice+1) * work_avg_dimension
             )
         {
-            histogram_slices.at(current_slice) = idx + 1;
-            if (current_slice == (global_dims[i] - 1))
+            /*
+            double d1 = work_avg_dimension -  work_new_dimension.at(current_slice);
+            double d2 = work_new_dimension.at(current_slice) + work_collection.at(idx+1) -
+                            work_avg_dimension;
+            if (d1 >= d2)
+                histogram_slices.at(current_slice) = idx + 1;
+            else
+            */
+                histogram_slices.at(current_slice) = idx;
+            if (current_slice == (this->global_dims.at(i) - 1))
             {
                 histogram_slices.at(current_slice) = work_collection.size();
                 W tmp_work = (W)0;
-                for (int j = 0; j < global_dims[i] - 1; ++j)
+                for (int j = 0; j < this->global_dims.at(i) - 1; ++j)
                     tmp_work += work_new_dimension.at(j);
                 work_new_dimension.at(current_slice) = work_tot_dimension - tmp_work;
                 break;
@@ -446,34 +353,32 @@ template<class T, class W> void ALL_Histogram_LB<T,W>::balance(int step)
     }
 
     // TODO: compute width of domain
-    T up = (local_coords[i] == global_dims[i]-1)?(T)work_collection.size():(T)histogram_slices.at(local_coords[i]);
-    T down = (local_coords[i] == 0)?(T)0:histogram_slices.at(local_coords[i]-1);
+    T up = (this->local_coords.at(i) == this->global_dims.at(i)-1)?(T)work_collection.size():(T)histogram_slices.at(this->local_coords.at(i));
+    T down = (this->local_coords.at(i) == 0)?(T)0:histogram_slices.at(this->local_coords.at(i)-1);
 
     // size of one slice
-    T size = sys_size[2*i+1] / (T)work_collection.size();
+    T size = this->sys_size[2*i+1] / (T)work_collection.size();
 
-    // TODO: change vertices
-    for (int d = 0; d < 6; ++d)
-        shifted_vertices->at(d) = vertices->at(d);
+    this->shifted_vertices = this->vertices;
 
-    shifted_vertices->at(i) = (T)down * size;
-    shifted_vertices->at(3+i) = (T)up * size;
+    this->shifted_vertices.at(0).set_coordinate(i, (T)down * size);
+    this->shifted_vertices.at(1).set_coordinate(i, (T)up * size);
 
     // compute min / max work in current dimension
-    MPI_Allreduce(work_new_dimension.data()+local_coords[i], 
+    MPI_Allreduce(work_new_dimension.data()+this->local_coords.at(i), 
                   &work_min_dimension, 
                   1, 
                   mpi_data_type_W, 
                   MPI_MIN, 
-                  global_comm);
-    MPI_Allreduce(work_new_dimension.data()+local_coords[i], 
+                  this->global_comm);
+    MPI_Allreduce(work_new_dimension.data()+this->local_coords.at(i), 
                   &work_max_dimension, 
                   1, 
                   mpi_data_type_W, 
                   MPI_MAX, 
-                  global_comm);
+                  this->global_comm);
 
-    if (local_rank == 0) std::cout << "HISTOGRAM: " 
+    if (this->local_rank == 0) std::cout << "HISTOGRAM: " 
                                    << i << " "
                                    << work_min_dimension << " "
                                    << work_max_dimension << " "
@@ -482,6 +387,15 @@ template<class T, class W> void ALL_Histogram_LB<T,W>::balance(int step)
                                    << std::endl;
 
     find_neighbors();
+
+    MPI_Barrier(this->global_comm);
+    // free created communicators
+    for (int i = 0; i < 6; ++i)
+    {
+        if (communicators.at(i) != MPI_COMM_SELF && communicators.at(i) != MPI_COMM_WORLD)
+            MPI_Comm_free(&(communicators.at(i)));
+    }
+
 }
 
 template<class T, class W> void ALL_Histogram_LB<T,W>::find_neighbors()
@@ -492,7 +406,7 @@ template<class T, class W> void ALL_Histogram_LB<T,W>::find_neighbors()
     MPI_Status sstat, rstat;
     // array to store neighbor vertices in Y/Z direction (reused)
     T* vertices_loc = new T[4];
-    T* vertices_rem = new T[8*global_dims[0]*global_dims[1]];
+    T* vertices_rem = new T[8*this->global_dims.at(0)*this->global_dims.at(1)];
 
     int rem_rank;
     int rem_coords[3];
@@ -505,61 +419,64 @@ template<class T, class W> void ALL_Histogram_LB<T,W>::find_neighbors()
     int offset_coords[3];
 
     // X-neighbors are static
-    n_neighbors->at(0) = n_neighbors->at(1) = 1;
+    n_neighbors.at(0) = n_neighbors.at(1) = 1;
 
     // find X-neighbors
-    MPI_Cart_shift(global_comm,0,1,&rank_left,&rank_right);
+    MPI_Cart_shift(this->global_comm,0,1,&rank_left,&rank_right);
 
     // store X-neighbors 
     neighbors.push_back(rank_left);
     neighbors.push_back(rank_right);
 
     // find Y-neighbors to get border information from
-    MPI_Cart_shift(global_comm,1,1,&rank_left,&rank_right);
+    MPI_Cart_shift(this->global_comm,1,1,&rank_left,&rank_right);
 
     // collect border information from local column
-    vertices_loc[0] = shifted_vertices->at(0);
-    vertices_loc[1] = shifted_vertices->at(dimension);
+    vertices_loc[0] = this->shifted_vertices.at(0).x(0);
+    vertices_loc[1] = this->shifted_vertices.at(1).x(0);
     MPI_Allgather(vertices_loc,
                   2,
                   mpi_data_type_T,
-                  vertices_rem+2*global_dims[0],
+                  vertices_rem+2*this->global_dims.at(0),
                   2,
                   mpi_data_type_T,
-                  communicators[1]);
+                  communicators.at(1));
 
     // exchange local column information with upper neighbor in Y direction (cart grid)
     MPI_Irecv(vertices_rem,
-              2*global_dims[0],
+              2*this->global_dims.at(0),
               mpi_data_type_T,
               rank_left, 
               0,
-              global_comm,
+              this->global_comm,
               &rreq);
-    MPI_Isend(vertices_rem+2*global_dims[0],
-              2*global_dims[0],
+    MPI_Isend(vertices_rem+2*this->global_dims.at(0),
+              2*this->global_dims.at(0),
               mpi_data_type_T,
               rank_right,
               0,
-              global_comm,&sreq);
+              this->global_comm,&sreq);
 
     // determine the offset in ranks
     offset_coords[0] = 0;
-    offset_coords[1] = local_coords[1] - 1;
-    offset_coords[2] = local_coords[2];
+    offset_coords[1] = this->local_coords.at(1) - 1;
+    offset_coords[2] = this->local_coords.at(2);
 
     rem_coords[1] = offset_coords[1];
     rem_coords[2] = offset_coords[2];
 
-    MPI_Cart_rank(global_comm,offset_coords,&rank_offset);
+    MPI_Cart_rank(this->global_comm,offset_coords,&rank_offset);
 
+    MPI_Barrier(this->global_comm);
+    if (this->local_rank == 0) std::cout << "HISTOGRAM: neighbor_find y-communication" 
+                                   << std::endl;
     // wait for communication
     MPI_Wait(&sreq,&sstat);
     MPI_Wait(&rreq,&rstat);
 
     // iterate about neighbor borders to determine the neighborship relation
-    n_neighbors->at(2) = 0;
-    for (int x = 0; x < global_dims[0]; ++x)
+    n_neighbors.at(2) = 0;
+    for (int x = 0; x < this->global_dims.at(0); ++x)
     {
         if ( 
                 ( vertices_rem[2*x] <= vertices_loc[0] && vertices_loc[0] <  vertices_rem[2*x+1] ) ||
@@ -568,49 +485,52 @@ template<class T, class W> void ALL_Histogram_LB<T,W>::find_neighbors()
                   vertices_loc[1] >= vertices_rem[2*x+1] )
            )
         {
-            n_neighbors->at(2)++;
+            n_neighbors.at(2)++;
             rem_coords[0] = x;
-            MPI_Cart_rank(global_comm,rem_coords,&rem_rank);
+            MPI_Cart_rank(this->global_comm,rem_coords,&rem_rank);
             neighbors.push_back(rem_rank);
         }
     }
 
     // barrier to ensure every process concluded the calculations before overwriting remote borders!
-    MPI_Barrier(global_comm);
+    MPI_Barrier(this->global_comm);
 
     // exchange local column information with lower neighbor in Y direction (cart grid)
     MPI_Irecv(vertices_rem,
-              2*global_dims[0],
+              2*this->global_dims.at(0),
               mpi_data_type_T,
               rank_right, 
               0,
-              global_comm,
+              this->global_comm,
               &rreq);
-    MPI_Isend(vertices_rem+2*global_dims[0],
-              2*global_dims[0],
+    MPI_Isend(vertices_rem+2*this->global_dims.at(0),
+              2*this->global_dims.at(0),
               mpi_data_type_T,
               rank_left,  
               0,
-              global_comm,
+              this->global_comm,
               &sreq);
 
     // determine the offset in ranks
     offset_coords[0] = 0;
-    offset_coords[1] = local_coords[1] + 1;
-    offset_coords[2] = local_coords[2];
+    offset_coords[1] = this->local_coords.at(1) + 1;
+    offset_coords[2] = this->local_coords.at(2);
 
     rem_coords[1] = offset_coords[1];
     rem_coords[2] = offset_coords[2];
 
-    MPI_Cart_rank(global_comm,offset_coords,&rank_offset);
+    MPI_Cart_rank(this->global_comm,offset_coords,&rank_offset);
 
+    MPI_Barrier(this->global_comm);
+    if (this->local_rank == 0) std::cout << "HISTOGRAM: neighbor_find y-communication 2" 
+                                   << std::endl;
     // wait for communication
     MPI_Wait(&sreq,&sstat);
     MPI_Wait(&rreq,&rstat);
 
     // iterate about neighbor borders to determine the neighborship relation
-    n_neighbors->at(3) = 0;
-    for (int x = 0; x < global_dims[0]; ++x)
+    n_neighbors.at(3) = 0;
+    for (int x = 0; x < this->global_dims.at(0); ++x)
     {
         if ( 
                 ( vertices_rem[2*x] <= vertices_loc[0] && vertices_loc[0] <  vertices_rem[2*x+1] ) ||
@@ -619,203 +539,212 @@ template<class T, class W> void ALL_Histogram_LB<T,W>::find_neighbors()
                   vertices_loc[1] >= vertices_rem[2*x+1] )
            )
         {
-            n_neighbors->at(3)++;
+            n_neighbors.at(3)++;
             rem_coords[0] = x;
-            MPI_Cart_rank(global_comm,rem_coords,&rem_rank);
+            MPI_Cart_rank(this->global_comm,rem_coords,&rem_rank);
             neighbors.push_back(rem_rank);
         }
     }
 
     // barrier to ensure every process concluded the calculations before overwriting remote borders!
-    MPI_Barrier(global_comm);
+    MPI_Barrier(this->global_comm);
 
     // find Z-neighbors to get border information from
-    MPI_Cart_shift(global_comm,2,1,&rank_left,&rank_right);
+    MPI_Cart_shift(this->global_comm,2,1,&rank_left,&rank_right);
 
     // collect border information from local column
-    vertices_loc[0] = shifted_vertices->at(0);
-    vertices_loc[1] = shifted_vertices->at(dimension);
-    vertices_loc[2] = shifted_vertices->at(1);
-    vertices_loc[3] = shifted_vertices->at(1+dimension);
+    vertices_loc[0] = this->shifted_vertices.at(0).x(0);
+    vertices_loc[1] = this->shifted_vertices.at(1).x(0);
+    vertices_loc[2] = this->shifted_vertices.at(0).x(1);
+    vertices_loc[3] = this->shifted_vertices.at(1).x(1);
 
-    MPI_Barrier(global_comm);
+    MPI_Barrier(this->global_comm);
 
     MPI_Allgather(vertices_loc,
                   4,
                   mpi_data_type_T,
-                  vertices_rem+4*global_dims[0]*global_dims[1],
+                  vertices_rem+4*this->global_dims.at(0)*this->global_dims.at(1),
                   4,
                   mpi_data_type_T,
-                  communicators[2]);
+                  communicators.at(2));
 
     // exchange local column information with upper neighbor in Z direction (cart grid)
     MPI_Irecv(vertices_rem,
-              4*global_dims[0]*global_dims[1],
+              4*this->global_dims.at(0)*this->global_dims.at(1),
               mpi_data_type_T,
               rank_left, 
               0,
-              global_comm,
+              this->global_comm,
               &rreq);
-    MPI_Isend(vertices_rem+4*global_dims[0]*global_dims[1],
-              4*global_dims[0]*global_dims[1],
+    MPI_Isend(vertices_rem+4*this->global_dims.at(0)*this->global_dims.at(1),
+              4*this->global_dims.at(0)*this->global_dims.at(1),
               mpi_data_type_T,
               rank_right,
               0,
-              global_comm,
+              this->global_comm,
               &sreq);
 
     // determine the offset in ranks
     offset_coords[0] = 0;
     offset_coords[1] = 0;
-    offset_coords[2] = local_coords[2]-1;
+    offset_coords[2] = this->local_coords.at(2)-1;
 
     rem_coords[2] = offset_coords[2];
 
-    MPI_Cart_rank(global_comm,offset_coords,&rank_offset);
+    MPI_Cart_rank(this->global_comm,offset_coords,&rank_offset);
 
+    MPI_Barrier(this->global_comm);
+    if (this->local_rank == 0) std::cout << "HISTOGRAM: neighbor_find z-communication" 
+                                   << std::endl;
     // wait for communication
     MPI_Wait(&sreq,&sstat);
     MPI_Wait(&rreq,&rstat);
 
 
     // iterate about neighbor borders to determine the neighborship relation
-    n_neighbors->at(4) = 0;
-    for (int y = 0; y < global_dims[1]; ++y)
+    n_neighbors.at(4) = 0;
+    for (int y = 0; y < this->global_dims.at(1); ++y)
     {
-        for (int x = 0; x < global_dims[0]; ++x)
+        for (int x = 0; x < this->global_dims.at(0); ++x)
         {
             if (
                    ( 
                         ( 
-                            vertices_rem[4*(x+y*global_dims[0])+2] <= vertices_loc[2] && 
-                            vertices_loc[2] <  vertices_rem[4*(x+y*global_dims[0])+3] 
+                            vertices_rem[4*(x+y*this->global_dims.at(0))+2] <= vertices_loc[2] && 
+                            vertices_loc[2] <  vertices_rem[4*(x+y*this->global_dims.at(0))+3] 
                         ) ||
                         ( 
-                            vertices_rem[4*(x+y*global_dims[0])+2] <  vertices_loc[3] && 
-                            vertices_loc[3] <= vertices_rem[4*(x+y*global_dims[0])+3] 
+                            vertices_rem[4*(x+y*this->global_dims.at(0))+2] <  vertices_loc[3] && 
+                            vertices_loc[3] <= vertices_rem[4*(x+y*this->global_dims.at(0))+3] 
                         ) ||
                         ( 
-                            vertices_rem[4*(x+y*global_dims[0])+2] >= vertices_loc[2] && 
-                            vertices_loc[2] < vertices_rem[4*(x+y*global_dims[0])+3] && 
-                            vertices_loc[3] >= vertices_rem[4*(x+y*global_dims[0])+3] 
+                            vertices_rem[4*(x+y*this->global_dims.at(0))+2] >= vertices_loc[2] && 
+                            vertices_loc[2] < vertices_rem[4*(x+y*this->global_dims.at(0))+3] && 
+                            vertices_loc[3] >= vertices_rem[4*(x+y*this->global_dims.at(0))+3] 
                         )
                    )
                )
             if (
                    ( 
                         ( 
-                            vertices_rem[4*(x+y*global_dims[0])] <= vertices_loc[0] && 
-                            vertices_loc[0] <  vertices_rem[4*(x+y*global_dims[0])+1] 
+                            vertices_rem[4*(x+y*this->global_dims.at(0))] <= vertices_loc[0] && 
+                            vertices_loc[0] <  vertices_rem[4*(x+y*this->global_dims.at(0))+1] 
                         ) ||
                         ( 
-                            vertices_rem[4*(x+y*global_dims[0])] <  vertices_loc[1] && 
-                            vertices_loc[1] <= vertices_rem[4*(x+y*global_dims[0])+1] 
+                            vertices_rem[4*(x+y*this->global_dims.at(0))] <  vertices_loc[1] && 
+                            vertices_loc[1] <= vertices_rem[4*(x+y*this->global_dims.at(0))+1] 
                         ) ||
                         ( 
-                            vertices_rem[4*(x+y*global_dims[0])] >= vertices_loc[0] && 
-                            vertices_loc[0] < vertices_rem[4*(x+y*global_dims[0])+1] && 
-                            vertices_loc[1] >= vertices_rem[4*(x+y*global_dims[0])+1] 
+                            vertices_rem[4*(x+y*this->global_dims.at(0))] >= vertices_loc[0] && 
+                            vertices_loc[0] < vertices_rem[4*(x+y*this->global_dims.at(0))+1] && 
+                            vertices_loc[1] >= vertices_rem[4*(x+y*this->global_dims.at(0))+1] 
                         )
                    )
                )
             {
-                n_neighbors->at(4)++;
+                n_neighbors.at(4)++;
                 rem_coords[1] = y;
                 rem_coords[0] = x;
-                MPI_Cart_rank(global_comm,rem_coords,&rem_rank);
+                MPI_Cart_rank(this->global_comm,rem_coords,&rem_rank);
                 neighbors.push_back(rem_rank);
             }
         }
     }
 
     // barrier to ensure every process concluded the calculations before overwriting remote borders!
-    MPI_Barrier(global_comm);
+    MPI_Barrier(this->global_comm);
 
     // exchange local column information with upper neighbor in Y direction (cart grid)
     MPI_Irecv(vertices_rem,
-              4*global_dims[0]*global_dims[1],
+              4*this->global_dims.at(0)*this->global_dims.at(1),
               mpi_data_type_T,
               rank_right, 
               0,
-              global_comm,
+              this->global_comm,
               &rreq);
-    MPI_Isend(vertices_rem+4*global_dims[0]*global_dims[1],
-            4*global_dims[0]*global_dims[1],
+    MPI_Isend(vertices_rem+4*this->global_dims.at(0)*this->global_dims.at(1),
+            4*this->global_dims.at(0)*this->global_dims.at(1),
             mpi_data_type_T,
             rank_left,
             0,
-            global_comm,
+            this->global_comm,
             &sreq);
 
     // determine the offset in ranks
     offset_coords[0] = 0;
     offset_coords[1] = 0;
-    offset_coords[2] = local_coords[2]+1;
+    offset_coords[2] = this->local_coords.at(2)+1;
 
     rem_coords[2] = offset_coords[2];
 
-    MPI_Cart_rank(global_comm,offset_coords,&rank_offset);
+    MPI_Cart_rank(this->global_comm,offset_coords,&rank_offset);
+
+    MPI_Barrier(this->global_comm);
+    if (this->local_rank == 0) std::cout << "HISTOGRAM: neighbor_find z-communication 2" 
+                                   << std::endl;
 
     // wait for communication
     MPI_Wait(&sreq,&sstat);
     MPI_Wait(&rreq,&rstat);
+    if (this->local_rank == 0) std::cout << "HISTOGRAM: neighbor_find z-communication 2" 
+                                   << std::endl;
 
     // iterate about neighbor borders to determine the neighborship relation
-    n_neighbors->at(5) = 0;
-    for (int y = 0; y < global_dims[1]; ++y)
+    n_neighbors.at(5) = 0;
+    for (int y = 0; y < this->global_dims.at(1); ++y)
     {
-        for (int x = 0; x < global_dims[0]; ++x)
+        for (int x = 0; x < this->global_dims.at(0); ++x)
         {
             if (
                    ( 
                         ( 
-                            vertices_rem[4*(x+y*global_dims[0])+2] <= vertices_loc[2] && 
-                            vertices_loc[2] <  vertices_rem[4*(x+y*global_dims[0])+3] 
+                            vertices_rem[4*(x+y*this->global_dims.at(0))+2] <= vertices_loc[2] && 
+                            vertices_loc[2] <  vertices_rem[4*(x+y*this->global_dims.at(0))+3] 
                         ) ||
                         ( 
-                            vertices_rem[4*(x+y*global_dims[0])+2] <  vertices_loc[3] && 
-                            vertices_loc[3] <= vertices_rem[4*(x+y*global_dims[0])+3] 
+                            vertices_rem[4*(x+y*this->global_dims.at(0))+2] <  vertices_loc[3] && 
+                            vertices_loc[3] <= vertices_rem[4*(x+y*this->global_dims.at(0))+3] 
                         ) ||
                         ( 
-                            vertices_rem[4*(x+y*global_dims[0])+2] >= vertices_loc[2] && 
-                            vertices_loc[2] < vertices_rem[4*(x+y*global_dims[0])+3] && 
-                            vertices_loc[3] >= vertices_rem[4*(x+y*global_dims[0])+3] 
+                            vertices_rem[4*(x+y*this->global_dims.at(0))+2] >= vertices_loc[2] && 
+                            vertices_loc[2] < vertices_rem[4*(x+y*this->global_dims.at(0))+3] && 
+                            vertices_loc[3] >= vertices_rem[4*(x+y*this->global_dims.at(0))+3] 
                         )
                    )
                )
             if (
                    ( 
                         ( 
-                            vertices_rem[4*(x+y*global_dims[0])] <= vertices_loc[0] && 
-                            vertices_loc[0] <  vertices_rem[4*(x+y*global_dims[0])+1] 
+                            vertices_rem[4*(x+y*this->global_dims.at(0))] <= vertices_loc[0] && 
+                            vertices_loc[0] <  vertices_rem[4*(x+y*this->global_dims.at(0))+1] 
                         ) ||
                         ( 
-                            vertices_rem[4*(x+y*global_dims[0])] <  vertices_loc[1] && 
-                            vertices_loc[1] <= vertices_rem[4*(x+y*global_dims[0])+1] 
+                            vertices_rem[4*(x+y*this->global_dims.at(0))] <  vertices_loc[1] && 
+                            vertices_loc[1] <= vertices_rem[4*(x+y*this->global_dims.at(0))+1] 
                         ) ||
                         ( 
-                            vertices_rem[4*(x+y*global_dims[0])] >= vertices_loc[0] && 
-                            vertices_loc[0] < vertices_rem[4*(x+y*global_dims[0])+1] && 
-                            vertices_loc[1] >= vertices_rem[4*(x+y*global_dims[0])+1] 
+                            vertices_rem[4*(x+y*this->global_dims.at(0))] >= vertices_loc[0] && 
+                            vertices_loc[0] < vertices_rem[4*(x+y*this->global_dims.at(0))+1] && 
+                            vertices_loc[1] >= vertices_rem[4*(x+y*this->global_dims.at(0))+1] 
                         )
                    )
                )
             {
-                n_neighbors->at(5)++;
+                n_neighbors.at(5)++;
                 rem_coords[1] = y;
                 rem_coords[0] = x;
-                MPI_Cart_rank(global_comm,rem_coords,&rem_rank);
+                MPI_Cart_rank(this->global_comm,rem_coords,&rem_rank);
                 neighbors.push_back(rem_rank);
             }
         }
     }
 
     // barrier to ensure every process concluded the calculations before overwriting remote borders!
-    MPI_Barrier(global_comm);
+    MPI_Barrier(this->global_comm);
 
     // clear up vertices array
-    delete vertices_loc;
-    delete vertices_rem;
+    delete[] vertices_loc;
+    delete[] vertices_rem;
 }
 
 // provide list of neighbors
@@ -826,7 +755,7 @@ template<class T, class W> void ALL_Histogram_LB<T,W>::get_neighbors(std::vector
 // provide list of neighbors in each direction
 template<class T, class W> void ALL_Histogram_LB<T,W>::get_neighbors(int** ret)
 {
-    *ret = n_neighbors->data();
+    *ret = n_neighbors.data();
 }
 
 #endif
diff --git a/include/ALL_LB.hpp b/include/ALL_LB.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..1db74299dfe011c83bfca308962518571062eaaf
--- /dev/null
+++ b/include/ALL_LB.hpp
@@ -0,0 +1,156 @@
+/*
+Copyright 2018 Rene Halver, Forschungszentrum Juelich GmbH, Germany
+Copyright 2018 Godehard Sutmann, Forschungszentrum Juelich GmbH, Germany
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice, this
+   list of conditions and the following disclaimer in the documentation and/or
+   other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
+OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#ifndef ALL_LB_HEADER_INCLUDED
+#define ALL_LB_HEADER_INCLUDED
+
+#include <vector>
+#include "ALL_Point.hpp"
+#include "mpi.h"
+
+// T: data type of vertices used in the balancer
+// W: data type of the work used in the balancer
+template <class T, class W> class ALL_LB {
+ public:
+    ALL_LB(int dim, T g) : 
+        gamma(g),
+        dimension(dim)
+    {
+        // set allowed minimum size to zero
+        min_size = std::vector<T>(dim,0.0);
+        periodicity.resize(dim);
+        local_coords.resize(dim);
+        global_dims.resize(dim);
+    }
+    virtual ~ALL_LB() = default;
+    virtual void setup() = 0;
+    virtual void balance(int step) = 0;
+    // neighbors
+    // provide list of neighbors
+    virtual void get_neighbors(std::vector<int>&) = 0;
+    // provide list of neighbors in each direction
+    virtual void get_neighbors(int**) = 0;
+    // setting / updating vertices
+    virtual void set_vertices(std::vector<ALL_Point<T>>&);
+
+    // global getter / setter functions
+    virtual void set_dimension(const int d) {dimension = d;}
+    virtual int get_dimension() {return dimension;}
+
+    virtual void set_gamma(const T g) {gamma = g;}
+    virtual T get_gamma() {return gamma;}
+
+    virtual void set_work(const std::vector<W>& w) {work = w;}
+    virtual void set_work(const W w) {work.resize(1); work.at(0) = w;}
+    
+    virtual void set_min_domain_size(const std::vector<T>& _min_size) 
+        {min_size = _min_size;}
+    virtual const std::vector<T>& get_min_domain_size() 
+        {return min_size;}
+
+    virtual void set_sys_size(const std::vector<T>& _sys_size) 
+        {sys_size = _sys_size;}
+    virtual const std::vector<T>& get_sys_size() 
+        {return sys_size;}
+        
+    virtual std::vector<W>& get_work() {return work;}
+    
+    //virtual void set_vertices(std::vector<T>&);
+    virtual std::vector<ALL_Point<T>>& get_vertices();
+    virtual std::vector<ALL_Point<T>>& get_shifted_vertices();
+    
+    virtual void set_communicator(MPI_Comm comm) 
+    {
+        // store communicator
+        global_comm = comm;
+        // get local rank within communicator
+        MPI_Comm_rank(comm,&local_rank);
+    }
+    
+    void resize_shifted_vertices(int);
+    
+    int get_n_vertices() {return vertices.size();}
+    
+    virtual void set_data(void*) = 0;
+    
+protected:
+    // dimension of the system 
+    int dimension;
+    // correction factor / histogram bin size
+    T gamma;
+    // used communicator
+    MPI_Comm global_comm;
+    // local rank
+    int local_rank;
+    // if function supports a minimum domain size, use this
+    std::vector<T> min_size;
+    // system size
+    std::vector<T> sys_size;
+    // work array
+    std::vector<W> work;
+    // vertices
+    std::vector<ALL_Point<T>> vertices;
+    // shifted vertices
+    std::vector<ALL_Point<T>> shifted_vertices;
+    // global dimension of the system in each coordinate
+    std::vector<int> global_dims;
+    // coordinates of the local domain in the system
+    std::vector<int> local_coords;
+    // periodicity of the MPI communicator / system
+    std::vector<int> periodicity;
+};
+
+// set the actual vertices (unsafe due to array copy)
+template <class T, class W> void ALL_LB<T,W>::set_vertices(std::vector<ALL_Point<T>>& vertices_in)
+{
+    vertices = vertices_in;
+    // as a basic assumption the number of shifted vertices
+    // is equal to the number of input vertices:
+    // exceptions:
+    //      VORONOI
+    shifted_vertices = vertices;
+}
+
+template <class T, class W> std::vector<ALL_Point<T>>& ALL_LB<T,W>::get_vertices()
+{
+    return vertices;
+}
+
+template <class T, class W> std::vector<ALL_Point<T>>& ALL_LB<T,W>::get_shifted_vertices()
+{
+    return shifted_vertices;
+}
+
+template <class T, class W> void ALL_LB<T,W>::resize_shifted_vertices(int new_size)
+{
+    shifted_vertices.resize(new_size);
+}
+#endif  // ALL_LB_HEADER_INCLUDED
diff --git a/include/ALL_Point.hpp b/include/ALL_Point.hpp
index 8f77e71a08fa6152e8b82e6b03056a38f8b3d6e9..bc37b5cfb3e0932a6009a588c7c571f1b51d3734 100644
--- a/include/ALL_Point.hpp
+++ b/include/ALL_Point.hpp
@@ -57,10 +57,16 @@ template <class T> class ALL_Point
         ALL_Point(const std::vector<T>&);
         // constructor for ALL_Point with given coordinates with weight (not secure)
         ALL_Point(const int,const T*, const T);
+        // constructor for ALL_Point with given coordinates with weight (not secure)
+        ALL_Point(const int,const T*, const T, const long);
         // constructor for ALL_Point with given coordinates with weight (std::list)
         ALL_Point(const int,const std::list<T>&, const T);
+        // constructor for ALL_Point with given coordinates with weight (std::list)
+        ALL_Point(const int,const std::list<T>&, const T, const long);
         // constructor for ALL_Point with given coordinates with weight (std::vector)
         ALL_Point(const std::vector<T>&, const T);
+        // constructor for ALL_Point with given coordinates with weight (std::vector)
+        ALL_Point(const std::vector<T>&, const T, const long);
         // destructor
         ~ALL_Point<T>() {};
 
@@ -68,6 +74,8 @@ template <class T> class ALL_Point
         void set_dimension(const int d) {dimension = d; coordinates.resize(d);}
         // set weight of point
         void set_weight(const T w) {weight = w;}
+        // set ID with a long
+        void set_id (const long i) {id = i;}
         // set coordinates with an array (not secure)
         void set_coordinates(const T*);
         // set coordinates with a std::list
@@ -79,6 +87,8 @@ template <class T> class ALL_Point
         T get_coordinate(const int) const;
         // get weight
         T get_weight() const {return weight;}
+        // get ID
+        long get_id() const {return id;}
         // set a single coordinate
         void set_coordinate(const int,const T&);
 
@@ -101,6 +111,8 @@ template <class T> class ALL_Point
 
         // dimension of the ALL_Point
         int dimension;
+        // id
+        long id;
         // array containg the coordinates
         std::vector<T> coordinates;
         // weight (e.g. number of interactions, sites in case of block-based
@@ -136,18 +148,36 @@ template <class T> ALL_Point<T>::ALL_Point(const int d, const T* data, const T w
     weight = w;
 }
 
+template <class T> ALL_Point<T>::ALL_Point(const int d, const T* data, const T w, const long i) 
+    : ALL_Point<T>(d,data,w)
+{
+    id = i;
+}
+
 template <class T> ALL_Point<T>::ALL_Point(const int d, const std::list<T>& data, const T w) 
     : ALL_Point<T>(d,data)
 {
     weight = w;
 }
 
+template <class T> ALL_Point<T>::ALL_Point(const int d, const std::list<T>& data, const T w, const long i) 
+    : ALL_Point<T>(d,data,w)
+{
+    id = i;
+}
+
 template <class T> ALL_Point<T>::ALL_Point(const std::vector<T>& data, const T w)
     : ALL_Point<T>(d,data)
 {
     weight = w;
 }
 
+template <class T> ALL_Point<T>::ALL_Point(const std::vector<T>& data, const T w, const long i)
+    : ALL_Point<T>(d,data,w)
+{
+    id = i;
+}
+
 // unsafe version: no bound checking for data possible
 // data needs to be at least of length dimension
 template <class T> void ALL_Point<T>::set_coordinates(const T* data)
diff --git a/include/ALL_Staggered.hpp b/include/ALL_Staggered.hpp
index 7f66ef852713d5ab49ff4082f746d81912865ed7..5b159842bf2f5eadcc9853adb9a399656fd8ec0a 100644
--- a/include/ALL_Staggered.hpp
+++ b/include/ALL_Staggered.hpp
@@ -35,181 +35,87 @@ POSSIBILITY OF SUCH DAMAGE.
 #include <exception>
 #include <vector>
 #include "ALL_CustomExceptions.hpp"
+#include "ALL_LB.hpp"
 
 // T: data type of vertices used in the balancer
 // W: data type of the work used in the balancer
-template <class T, class W> class ALL_Staggered_LB
+template <class T, class W> class ALL_Staggered_LB : public ALL_LB<T,W>
 {
     public:
         ALL_Staggered_LB() {}
-        ALL_Staggered_LB(int d, W w, T g) : dimension(d),
-                                         work(w),
-                                         gamma(g)
+        ALL_Staggered_LB(int d, W w, T g) : 
+            ALL_LB<T,W>(d,g)
         {
+            this->set_work(w);
             // need the lower and upper bounds
             // of the domain for the tensor-based
             // load-balancing scheme
-            vertices = std::vector<T>(6);
-            shifted_vertices = std::vector<T>(6);
-            // global dimension of the system in each coordinate
-            global_dims = new int[3];
-            // coordinates of the local domain in the system
-            local_coords = new int[3];
-            // periodicity of the MPI communicator / system
-            periodicity = new int[3];
-
+            
             // array of MPI communicators for each direction (to collect work
             // on each plane)
-            communicators = new MPI_Comm[3];
-            n_neighbors = new std::vector<int>(6);
-
-            // allocate min size array
-            min_size = new T[d];
+            communicators.resize(d);
+            n_neighbors.resize(2*d);
         }
 
-        ~ALL_Staggered_LB();
-
-        void set_vertices(T*);
+        ~ALL_Staggered_LB() = default;;
 
         // setup communicators for the exchange of work in each domain
-        void setup(MPI_Comm);
+        void setup() override;
 
         // do a load-balancing step
-        virtual void balance();
+        void balance(int step) override;
 
         // getter for variables (passed by reference to avoid
         // direct access to private members of the object)
 
-        // getter for base vertices
-        void get_vertices(T*);
-        // getter for shifted vertices
-        void get_shifted_vertices(T*);
-        // getter for shifted vertices
-        void get_shifted_vertices(std::vector<T>&);
-
-        // set minimum domain size
-        void set_min_domain_size(T*);
-
         // neighbors
         // provide list of neighbors
-        virtual void get_neighbors(std::vector<int>&);
+        virtual void get_neighbors(std::vector<int>&) override;
         // provide list of neighbors in each direction
-        virtual void get_neighbors(int**);
-
+        virtual void get_neighbors(int**) override;
 
+        // empty override for setting method specific data
+        virtual void set_data(void*) override {}
     private:
-        // dimension of the system
-        int dimension;
-
-        // shift cooeficient
-        T gamma;
-
-        // work per domain
-        W work;
-
-        // vertices
-        std::vector<T> vertices;
-
-        // vertices after load-balancing shift
-        std::vector<T> shifted_vertices;
-
-        // MPI values
-        // global communicator
-        MPI_Comm global_comm;
-        // rank of the local domain in the communicator
-        int local_rank;
-        // global dimension of the system in each coordinate
-        int* global_dims;
-        // coordinates of the local domain in the system
-        int* local_coords;
-        // periodicity of the MPI communicator / system
-        int* periodicity;
-
-        // type for MPI communication
+       // type for MPI communication
         MPI_Datatype mpi_data_type_T;
         MPI_Datatype mpi_data_type_W;
 
         // array of MPI communicators for each direction (to collect work
         // on each plane)
-        MPI_Comm* communicators;
+        std::vector<MPI_Comm>  communicators;
 
         // list of neighbors
         std::vector<int> neighbors;
-        std::vector<int>* n_neighbors;
+        std::vector<int> n_neighbors;
 
         // find neighbors (more sophisticated than for tensor-product
         // variant of LB)
         void find_neighbors();
-
-        // minimum domain size
-        T* min_size;
 };
 
+/*
 template <class T, class W> ALL_Staggered_LB<T,W>::~ALL_Staggered_LB()
 {
-    delete[] global_dims;
-    delete[] local_coords;
-    delete[] periodicity;
-
-    for (int i = 1; i < 3; ++i)
-        MPI_Comm_free(communicators + i);
-    delete[] communicators;
-
-    delete n_neighbors;
-    delete[] min_size;
-}
-
-template <class T, class W> void ALL_Staggered_LB<T,W>::get_vertices(T* result)
-{
-    for (int i = 0; i < 2*dimension; ++i)
-    {
-        result[i] = vertices.at(i);
-    }
-}
-
-template <class T, class W> void ALL_Staggered_LB<T,W>::set_min_domain_size(T* min_size)
-{
-    for (int i = 0; i < dimension; ++i)
-        this->min_size[i] = min_size[i];
-}
-
-template <class T, class W> void ALL_Staggered_LB<T,W>::get_shifted_vertices(T* result)
-{
-    for (int i = 0; i < 2*dimension; ++i)
-    {
-        result[i] = shifted_vertices.at(i);
-    }
-}
-
-template <class T, class W> void ALL_Staggered_LB<T,W>::get_shifted_vertices(std::vector<T>& result)
-{
-    for (int i = 0; i < 2*dimension; ++i)
-    {
-        result.at(i) = shifted_vertices.at(i);
-    }
-}
-
-// set the actual vertices (unsafe due to array copy)
-template <class T, class W> void ALL_Staggered_LB<T,W>::set_vertices(T* v)
-{
-    for (int i = 0; i < 2*dimension; ++i)
-    {
-        vertices.at(i) = v[i];
-    }
+    int dimension = this->get_dimension();
+    for (int i = 1; i < dimension; ++i)
+        MPI_Comm_free(&communicators.at(i));
 }
+*/
 
 // setup routine for the tensor-based load-balancing scheme
 // requires:
-//              global_comm (int): cartesian MPI communicator, from
+//              this->global_comm (int): cartesian MPI communicator, from
 //                                 which separate sub communicators
 //                                 are derived in order to represent
 //                                 each plane of domains in the system
-template <class T, class W> void ALL_Staggered_LB<T,W>::setup(MPI_Comm comm)
+template <class T, class W> void ALL_Staggered_LB<T,W>::setup()
 {
     int status;
+    int dimension = this->get_dimension();
 
     // check if Communicator is cartesian
-    MPI_Topo_test(comm, &status);
+    MPI_Topo_test(this->global_comm, &status);
     if (status != MPI_CART)
     {
         throw ALL_Invalid_Comm_Type_Exception(
@@ -219,27 +125,24 @@ template <class T, class W> void ALL_Staggered_LB<T,W>::setup(MPI_Comm comm)
                 "Cartesian MPI communicator required, passed communicator is not cartesian");
     }
 
-    // store MPI communicator to local variable
-    global_comm = comm;
-
     // get the local coordinates, periodicity and global size from the MPI communicator
-    MPI_Cart_get(global_comm, dimension, global_dims, periodicity, local_coords);
+    MPI_Cart_get(this->global_comm, dimension, this->global_dims.data(), this->periodicity.data(), this->local_coords.data());
 
     // get the local rank from the MPI communicator
-    MPI_Cart_rank(global_comm, local_coords, &local_rank);
+    MPI_Cart_rank(this->global_comm, this->local_coords.data(), &this->local_rank);
 
     // create sub-communicators
 
     // z-plane
-    MPI_Comm_split(global_comm,local_coords[2],local_coords[0]+local_coords[1]*global_dims[0],&communicators[2]);
+    MPI_Comm_split(this->global_comm,this->local_coords.at(2),this->local_coords.at(0)+this->local_coords.at(1)*this->global_dims.at(0),&communicators.at(2));
 
     // y-column
-    MPI_Comm_split(global_comm,local_coords[2] * global_dims[1]
-                              +local_coords[1] ,local_coords[0],
-                              &communicators[1]);
+    MPI_Comm_split(this->global_comm,this->local_coords.at(2) * this->global_dims.at(1)
+                              +this->local_coords.at(1) ,this->local_coords.at(0),
+                              &communicators.at(1));
 
     // only cell itself
-    communicators[0] = MPI_COMM_SELF;
+    communicators.at(0) = MPI_COMM_SELF;
 
     // determine correct MPI data type for template T
     if (std::is_same<T,double>::value) mpi_data_type_T = MPI_DOUBLE;
@@ -275,105 +178,131 @@ template <class T, class W> void ALL_Staggered_LB<T,W>::setup(MPI_Comm comm)
     neighbors.clear();
     for (int i = 0; i < dimension; ++i)
     {
-        MPI_Cart_shift(global_comm,i,1,&rank_left,&rank_right);
+        MPI_Cart_shift(this->global_comm,i,1,&rank_left,&rank_right);
         neighbors.push_back(rank_left);
         neighbors.push_back(rank_right);
     }
 
 }
 
-template<class T, class W> void ALL_Staggered_LB<T,W>::balance()
+template<class T, class W> void ALL_Staggered_LB<T,W>::balance(int)
 {
+    int dimension = this->get_dimension();
+    
     // loop over all available dimensions
     for (int i = 0; i < dimension; ++i)
     {
         W work_local_plane;
+#ifdef ALL_DEBUG_ENABLED
+            MPI_Barrier(this->global_comm);
+            if( this->local_rank == 0 ) std::cout << "ALL_Staggered::balance(): before work computation..." << std::endl;
+#endif
         // collect work from all processes in the same plane
-        MPI_Allreduce(&work,&work_local_plane,1,mpi_data_type_W,MPI_SUM,communicators[i]);
+        MPI_Allreduce(this->work.data(),&work_local_plane,1,mpi_data_type_W,MPI_SUM,communicators[i]);
 
+#ifdef ALL_DEBUG_ENABLED
+            MPI_Barrier(this->global_comm);
+            if( this->local_rank == 0 ) std::cout << "ALL_Staggered::balance(): before work distribution..." << std::endl;
+#endif
         // correct right border:
 
         W remote_work;
-        T size;
+        T local_size;
         T remote_size;
         // determine neighbors
         int rank_left, rank_right;
 
-        MPI_Cart_shift(global_comm,i,1,&rank_left,&rank_right);
+        MPI_Cart_shift(this->global_comm,i,1,&rank_left,&rank_right);
 
         // collect work from right neighbor plane
         MPI_Request sreq, rreq;
         MPI_Status sstat, rstat;
 
-        MPI_Irecv(&remote_work,1,mpi_data_type_W,rank_right,0,global_comm,&rreq);
-        MPI_Isend(&work_local_plane,1,mpi_data_type_W,rank_left,0,global_comm,&sreq);
+        MPI_Irecv(&remote_work,1,mpi_data_type_W,rank_right,0,this->global_comm,&rreq);
+        MPI_Isend(&work_local_plane,1,mpi_data_type_W,rank_left,0,this->global_comm,&sreq);
         MPI_Wait(&sreq,&sstat);
         MPI_Wait(&rreq,&rstat);
 
         // collect size in dimension from right neighbor plane
 
-        size = vertices.at(dimension+i) - vertices.at(i);
+        local_size = this->vertices.at(1).x(i) - this->vertices.at(0).x(i);
 
-        MPI_Irecv(&remote_size,1,mpi_data_type_T,rank_right,0,global_comm,&rreq);
-        MPI_Isend(&size,1,mpi_data_type_T,rank_left,0,global_comm,&sreq);
+        MPI_Irecv(&remote_size,1,mpi_data_type_T,rank_right,0,this->global_comm,&rreq);
+        MPI_Isend(&local_size,1,mpi_data_type_T,rank_left,0,this->global_comm,&sreq);
         MPI_Wait(&sreq,&sstat);
         MPI_Wait(&rreq,&rstat);
 
         // automatic selection of gamma to guarantee stability of method (choice of user gamma ignored)
-        gamma = std::max(4.1, 1.1 * (1.0 + std::max(size,remote_size) / std::min(size,remote_size)));
+        this->gamma = std::max(4.1, 1.1 * (1.0 + std::max(local_size,remote_size) / std::min(local_size,remote_size)));
 
+#ifdef ALL_DEBUG_ENABLED
+            MPI_Barrier(this->global_comm);
+            if( this->local_rank == 0 ) std::cout << "ALL_Staggered::balance(): before shift calculation..." << std::endl;
+#endif
         // calculate shift of borders:
         // s = 0.5 * gamma * (W_r - W_l) / (W_r + W_l) * (d_r + d_l)
         // *_r = * of neighbor (remote)
         // *_l = * of local domain
         T shift = (T)0;
-        if (rank_right != MPI_PROC_NULL && local_coords[i] != global_dims[i]-1 && !( remote_work == (T)0 && work_local_plane == (T)0) )
-            shift = 1.0 / gamma * 0.5 * (remote_work - work_local_plane) / (remote_work + work_local_plane) * (size + remote_size);
+        if (rank_right != MPI_PROC_NULL && this->local_coords[i] != this->global_dims[i]-1 && !( remote_work == (T)0 && work_local_plane == (T)0) )
+            shift = 1.0 / this->get_gamma() * 0.5 * (remote_work - work_local_plane) / (remote_work + work_local_plane) * (local_size + remote_size);
 
         // reimplemented check if move is too large
-        // in theory should be unneeded due to automatic choice of gamma
-        T maxmove = 0.4 * std::min(size,remote_size);
-        maxmove = std::min(0.5*std::min(size-min_size[i],remote_size-min_size[i]),maxmove);
+        
+        // determine a maximum move in order to avoid overlapping borders and to guarantee
+        // a stable shift of the domain borders (avoid the loss of a domain due to too large
+        // shifts of its borders [no crossing of borders])
+        T maxmove;
+        if (shift > 0.0) 
+            maxmove = 0.49*(remote_size-this->min_size[i]);
+        else 
+            maxmove = 0.49*(local_size-this->min_size[i]);
 
         if (std::fabs(shift) > maxmove)
         {
             shift = (shift < 0.0)?-maxmove:maxmove;
         }
 
+#ifdef ALL_DEBUG_ENABLED
+            MPI_Barrier(this->global_comm);
+            if( this->local_rank == 0 ) std::cout << "ALL_Staggered::balance(): before shift distibution..." << std::endl;
+#endif
         // send shift to right neighbors
         T remote_shift = (T)0;
 
-        MPI_Irecv(&remote_shift,1,mpi_data_type_T,rank_left,0,global_comm,&rreq);
-        MPI_Isend(&shift,1,mpi_data_type_T,rank_right,0,global_comm,&sreq);
+        MPI_Irecv(&remote_shift,1,mpi_data_type_T,rank_left,0,this->global_comm,&rreq);
+        MPI_Isend(&shift,1,mpi_data_type_T,rank_right,0,this->global_comm,&sreq);
         MPI_Wait(&sreq,&sstat);
         MPI_Wait(&rreq,&rstat);
 
         // for now: test case for simple program
 
         // if a left neighbor exists: shift left border
-        if (rank_left != MPI_PROC_NULL && local_coords[i] != 0)
-            shifted_vertices.at(i) = vertices.at(i) + remote_shift;
+                // if a left neighbor exists: shift left border 
+        if (rank_left != MPI_PROC_NULL && this->local_coords[i] != 0)
+            this->shifted_vertices.at(0).set_coordinate(i, this->vertices.at(0).x(i) + remote_shift);
         else
-            shifted_vertices.at(i) = vertices.at(i);
+            this->shifted_vertices.at(0).set_coordinate(i, this->vertices.at(0).x(i));
 
         // if a right neighbor exists: shift right border
-        if (rank_right != MPI_PROC_NULL && local_coords[i] != global_dims[i]-1)
-            shifted_vertices.at(dimension+i) = vertices.at(dimension+i) + shift;
+        if (rank_right != MPI_PROC_NULL && this->local_coords[i] != this->global_dims[i]-1)
+            this->shifted_vertices.at(1).set_coordinate(i, this->vertices.at(1).x(i) + shift);
         else
-            shifted_vertices.at(dimension+i) = vertices.at(dimension+i);
+            this->shifted_vertices.at(1).set_coordinate(i, this->vertices.at(1).x(i));
 
-        // check if vertices are crossed and throw exception if thing went wrong
-        if (shifted_vertices.at(dimension+i) < shifted_vertices.at(i))
+
+        // check if vertices are crossed and throw exception if something went wrong
+        if (this->shifted_vertices.at(1).x(i) < this->shifted_vertices.at(1).x(i))
         {
-            std::cout << "ERROR on process: " << local_rank << " "
-                                              << vertices.at(i) << " "
-                                              << vertices.at(dimension+i) << " "
-                                              << shifted_vertices.at(i) << " "
-                                              << shifted_vertices.at(dimension+i) << " "
+            std::cout << "ERROR on process: " << this->local_rank << " "
+                                              << this->vertices.at(0).x(i) << " "
+                                              << this->vertices.at(1).x(i) << " "
+                                              << this->shifted_vertices.at(0).x(i) << " "
+                                              << this->shifted_vertices.at(1).x(i) << " "
                                               << remote_shift << " "
                                               << shift << " "
                                               << remote_size << " "
-                                              << size << " "
+                                              << local_size << " "
                                               << std::fabs(maxmove) << " "
                                               << maxmove << " "
                                               << std::endl;
@@ -384,19 +313,28 @@ template<class T, class W> void ALL_Staggered_LB<T,W>::balance()
                     "Lower border of process larger than upper border of process!");
         }
 
+#ifdef ALL_DEBUG_ENABLED
+            MPI_Barrier(this->global_comm);
+            if( this->local_rank == 0 ) std::cout << "ALL_Staggered::balance(): before neighbor search..." << std::endl;
+#endif
         find_neighbors();
     }
 }
 
 template<class T, class W> void ALL_Staggered_LB<T,W>::find_neighbors()
 {
+    int dimension = this->get_dimension();
+    auto work = this->get_work();
+    auto vertices = this->get_vertices();
+    auto shifted_vertices = this->get_shifted_vertices();
+    
     neighbors.clear();
     // collect work from right neighbor plane
     MPI_Request sreq, rreq;
     MPI_Status sstat, rstat;
     // array to store neighbor vertices in Y/Z direction (reused)
     T* vertices_loc = new T[4];
-    T* vertices_rem = new T[8*global_dims[0]*global_dims[1]];
+    T* vertices_rem = new T[8*this->global_dims[0]*this->global_dims[1]];
 
     int rem_rank;
     int rem_coords[3];
@@ -409,44 +347,44 @@ template<class T, class W> void ALL_Staggered_LB<T,W>::find_neighbors()
     int offset_coords[3];
 
     // X-neighbors are static
-    n_neighbors->at(0) = n_neighbors->at(1) = 1;
+    n_neighbors.at(0) = n_neighbors.at(1) = 1;
 
     // find X-neighbors
-    MPI_Cart_shift(global_comm,0,1,&rank_left,&rank_right);
+    MPI_Cart_shift(this->global_comm,0,1,&rank_left,&rank_right);
 
     // store X-neighbors
     neighbors.push_back(rank_left);
     neighbors.push_back(rank_right);
 
     // find Y-neighbors to get border information from
-    MPI_Cart_shift(global_comm,1,1,&rank_left,&rank_right);
+    MPI_Cart_shift(this->global_comm,1,1,&rank_left,&rank_right);
 
     // collect border information from local column
-    vertices_loc[0] = shifted_vertices.at(0);
-    vertices_loc[1] = shifted_vertices.at(dimension);
-    MPI_Allgather(vertices_loc,2,mpi_data_type_T,vertices_rem+2*global_dims[0],2,mpi_data_type_T,communicators[1]);
+    vertices_loc[0] = shifted_vertices.at(0).x(0);
+    vertices_loc[1] = shifted_vertices.at(1).x(0);
+    MPI_Allgather(vertices_loc,2,mpi_data_type_T,vertices_rem+2*this->global_dims[0],2,mpi_data_type_T,communicators[1]);
 
     // exchange local column information with upper neighbor in Y direction (cart grid)
-    MPI_Irecv(vertices_rem                 ,2*global_dims[0],mpi_data_type_T,rank_left, 0,global_comm,&rreq);
-    MPI_Isend(vertices_rem+2*global_dims[0],2*global_dims[0],mpi_data_type_T,rank_right,0,global_comm,&sreq);
+    MPI_Irecv(vertices_rem                 ,2*this->global_dims[0],mpi_data_type_T,rank_left, 0,this->global_comm,&rreq);
+    MPI_Isend(vertices_rem+2*this->global_dims[0],2*this->global_dims[0],mpi_data_type_T,rank_right,0,this->global_comm,&sreq);
 
     // determine the offset in ranks
     offset_coords[0] = 0;
-    offset_coords[1] = local_coords[1] - 1;
-    offset_coords[2] = local_coords[2];
+    offset_coords[1] = this->local_coords[1] - 1;
+    offset_coords[2] = this->local_coords[2];
 
     rem_coords[1] = offset_coords[1];
     rem_coords[2] = offset_coords[2];
 
-    MPI_Cart_rank(global_comm,offset_coords,&rank_offset);
+    MPI_Cart_rank(this->global_comm,offset_coords,&rank_offset);
 
     // wait for communication
     MPI_Wait(&sreq,&sstat);
     MPI_Wait(&rreq,&rstat);
 
     // iterate about neighbor borders to determine the neighborship relation
-    n_neighbors->at(2) = 0;
-    for (int x = 0; x < global_dims[0]; ++x)
+    n_neighbors.at(2) = 0;
+    for (int x = 0; x < this->global_dims[0]; ++x)
     {
         if (
                 ( vertices_rem[2*x] <= vertices_loc[0] && vertices_loc[0] <  vertices_rem[2*x+1] ) ||
@@ -455,37 +393,37 @@ template<class T, class W> void ALL_Staggered_LB<T,W>::find_neighbors()
                   vertices_loc[1] >= vertices_rem[2*x+1] )
            )
         {
-            n_neighbors->at(2)++;
+            n_neighbors.at(2)++;
             rem_coords[0] = x;
-            MPI_Cart_rank(global_comm,rem_coords,&rem_rank);
+            MPI_Cart_rank(this->global_comm,rem_coords,&rem_rank);
             neighbors.push_back(rem_rank);
         }
     }
 
     // barrier to ensure every process concluded the calculations before overwriting remote borders!
-    MPI_Barrier(global_comm);
+    MPI_Barrier(this->global_comm);
 
     // exchange local column information with lower neighbor in Y direction (cart grid)
-    MPI_Irecv(vertices_rem                 ,2*global_dims[0],mpi_data_type_T,rank_right, 0,global_comm,&rreq);
-    MPI_Isend(vertices_rem+2*global_dims[0],2*global_dims[0],mpi_data_type_T,rank_left,  0,global_comm,&sreq);
+    MPI_Irecv(vertices_rem                 ,2*this->global_dims[0],mpi_data_type_T,rank_right, 0,this->global_comm,&rreq);
+    MPI_Isend(vertices_rem+2*this->global_dims[0],2*this->global_dims[0],mpi_data_type_T,rank_left,  0,this->global_comm,&sreq);
 
     // determine the offset in ranks
     offset_coords[0] = 0;
-    offset_coords[1] = local_coords[1] + 1;
-    offset_coords[2] = local_coords[2];
+    offset_coords[1] = this->local_coords[1] + 1;
+    offset_coords[2] = this->local_coords[2];
 
     rem_coords[1] = offset_coords[1];
     rem_coords[2] = offset_coords[2];
 
-    MPI_Cart_rank(global_comm,offset_coords,&rank_offset);
+    MPI_Cart_rank(this->global_comm,offset_coords,&rank_offset);
 
     // wait for communication
     MPI_Wait(&sreq,&sstat);
     MPI_Wait(&rreq,&rstat);
 
     // iterate about neighbor borders to determine the neighborship relation
-    n_neighbors->at(3) = 0;
-    for (int x = 0; x < global_dims[0]; ++x)
+    n_neighbors.at(3) = 0;
+    for (int x = 0; x < this->global_dims[0]; ++x)
     {
         if (
                 ( vertices_rem[2*x] <= vertices_loc[0] && vertices_loc[0] <  vertices_rem[2*x+1] ) ||
@@ -494,41 +432,41 @@ template<class T, class W> void ALL_Staggered_LB<T,W>::find_neighbors()
                   vertices_loc[1] >= vertices_rem[2*x+1] )
            )
         {
-            n_neighbors->at(3)++;
+            n_neighbors.at(3)++;
             rem_coords[0] = x;
-            MPI_Cart_rank(global_comm,rem_coords,&rem_rank);
+            MPI_Cart_rank(this->global_comm,rem_coords,&rem_rank);
             neighbors.push_back(rem_rank);
         }
     }
 
     // barrier to ensure every process concluded the calculations before overwriting remote borders!
-    MPI_Barrier(global_comm);
+    MPI_Barrier(this->global_comm);
 
     // find Z-neighbors to get border information from
-    MPI_Cart_shift(global_comm,2,1,&rank_left,&rank_right);
+    MPI_Cart_shift(this->global_comm,2,1,&rank_left,&rank_right);
 
     // collect border information from local column
-    vertices_loc[0] = shifted_vertices.at(0);
-    vertices_loc[1] = shifted_vertices.at(dimension);
-    vertices_loc[2] = shifted_vertices.at(1);
-    vertices_loc[3] = shifted_vertices.at(1+dimension);
+    vertices_loc[0] = shifted_vertices.at(0).x(0);
+    vertices_loc[1] = shifted_vertices.at(1).x(0);
+    vertices_loc[2] = shifted_vertices.at(0).x(1);
+    vertices_loc[3] = shifted_vertices.at(1).x(1);
 
-    MPI_Barrier(global_comm);
+    MPI_Barrier(this->global_comm);
 
-    MPI_Allgather(vertices_loc,4,mpi_data_type_T,vertices_rem+4*global_dims[0]*global_dims[1],4,mpi_data_type_T,communicators[2]);
+    MPI_Allgather(vertices_loc,4,mpi_data_type_T,vertices_rem+4*this->global_dims[0]*this->global_dims[1],4,mpi_data_type_T,communicators[2]);
 
     // exchange local column information with upper neighbor in Z direction (cart grid)
-    MPI_Irecv(vertices_rem                                ,4*global_dims[0]*global_dims[1],mpi_data_type_T,rank_left, 0,global_comm,&rreq);
-    MPI_Isend(vertices_rem+4*global_dims[0]*global_dims[1],4*global_dims[0]*global_dims[1],mpi_data_type_T,rank_right,0,global_comm,&sreq);
+    MPI_Irecv(vertices_rem                                ,4*this->global_dims[0]*this->global_dims[1],mpi_data_type_T,rank_left, 0,this->global_comm,&rreq);
+    MPI_Isend(vertices_rem+4*this->global_dims[0]*this->global_dims[1],4*this->global_dims[0]*this->global_dims[1],mpi_data_type_T,rank_right,0,this->global_comm,&sreq);
 
     // determine the offset in ranks
     offset_coords[0] = 0;
     offset_coords[1] = 0;
-    offset_coords[2] = local_coords[2]-1;
+    offset_coords[2] = this->local_coords[2]-1;
 
     rem_coords[2] = offset_coords[2];
 
-    MPI_Cart_rank(global_comm,offset_coords,&rank_offset);
+    MPI_Cart_rank(this->global_comm,offset_coords,&rank_offset);
 
     // wait for communication
     MPI_Wait(&sreq,&sstat);
@@ -536,91 +474,91 @@ template<class T, class W> void ALL_Staggered_LB<T,W>::find_neighbors()
 
 
     // iterate about neighbor borders to determine the neighborship relation
-    n_neighbors->at(4) = 0;
-    for (int y = 0; y < global_dims[1]; ++y)
+    n_neighbors.at(4) = 0;
+    for (int y = 0; y < this->global_dims[1]; ++y)
     {
-        for (int x = 0; x < global_dims[0]; ++x)
+        for (int x = 0; x < this->global_dims[0]; ++x)
         {
             if (
                    (
-                        ( vertices_rem[4*(x+y*global_dims[0])+2] <= vertices_loc[2] && vertices_loc[2] <  vertices_rem[4*(x+y*global_dims[0])+3] ) ||
-                        ( vertices_rem[4*(x+y*global_dims[0])+2] <  vertices_loc[3] && vertices_loc[3] <= vertices_rem[4*(x+y*global_dims[0])+3] ) ||
-                        ( vertices_rem[4*(x+y*global_dims[0])+2] >= vertices_loc[2] && vertices_loc[2] < vertices_rem[4*(x+y*global_dims[0])+3] &&
-                          vertices_loc[3] >= vertices_rem[4*(x+y*global_dims[0])+3] )
+                        ( vertices_rem[4*(x+y*this->global_dims[0])+2] <= vertices_loc[2] && vertices_loc[2] <  vertices_rem[4*(x+y*this->global_dims[0])+3] ) ||
+                        ( vertices_rem[4*(x+y*this->global_dims[0])+2] <  vertices_loc[3] && vertices_loc[3] <= vertices_rem[4*(x+y*this->global_dims[0])+3] ) ||
+                        ( vertices_rem[4*(x+y*this->global_dims[0])+2] >= vertices_loc[2] && vertices_loc[2] < vertices_rem[4*(x+y*this->global_dims[0])+3] &&
+                          vertices_loc[3] >= vertices_rem[4*(x+y*this->global_dims[0])+3] )
                    )
                )
             if (
                    (
-                        ( vertices_rem[4*(x+y*global_dims[0])] <= vertices_loc[0] && vertices_loc[0] <  vertices_rem[4*(x+y*global_dims[0])+1] ) ||
-                        ( vertices_rem[4*(x+y*global_dims[0])] <  vertices_loc[1] && vertices_loc[1] <= vertices_rem[4*(x+y*global_dims[0])+1] ) ||
-                        ( vertices_rem[4*(x+y*global_dims[0])] >= vertices_loc[0] && vertices_loc[0] < vertices_rem[4*(x+y*global_dims[0])+1] &&
-                          vertices_loc[1] >= vertices_rem[4*(x+y*global_dims[0])+1] )
+                        ( vertices_rem[4*(x+y*this->global_dims[0])] <= vertices_loc[0] && vertices_loc[0] <  vertices_rem[4*(x+y*this->global_dims[0])+1] ) ||
+                        ( vertices_rem[4*(x+y*this->global_dims[0])] <  vertices_loc[1] && vertices_loc[1] <= vertices_rem[4*(x+y*this->global_dims[0])+1] ) ||
+                        ( vertices_rem[4*(x+y*this->global_dims[0])] >= vertices_loc[0] && vertices_loc[0] < vertices_rem[4*(x+y*this->global_dims[0])+1] &&
+                          vertices_loc[1] >= vertices_rem[4*(x+y*this->global_dims[0])+1] )
                    )
                )
             {
-                n_neighbors->at(4)++;
+                n_neighbors.at(4)++;
                 rem_coords[1] = y;
                 rem_coords[0] = x;
-                MPI_Cart_rank(global_comm,rem_coords,&rem_rank);
+                MPI_Cart_rank(this->global_comm,rem_coords,&rem_rank);
                 neighbors.push_back(rem_rank);
             }
         }
     }
 
     // barrier to ensure every process concluded the calculations before overwriting remote borders!
-    MPI_Barrier(global_comm);
+    MPI_Barrier(this->global_comm);
 
     // exchange local column information with upper neighbor in Y direction (cart grid)
-    MPI_Irecv(vertices_rem                                ,4*global_dims[0]*global_dims[1],mpi_data_type_T,rank_right, 0,global_comm,&rreq);
-    MPI_Isend(vertices_rem+4*global_dims[0]*global_dims[1],4*global_dims[0]*global_dims[1],mpi_data_type_T,rank_left,0,global_comm,&sreq);
+    MPI_Irecv(vertices_rem                                ,4*this->global_dims[0]*this->global_dims[1],mpi_data_type_T,rank_right, 0,this->global_comm,&rreq);
+    MPI_Isend(vertices_rem+4*this->global_dims[0]*this->global_dims[1],4*this->global_dims[0]*this->global_dims[1],mpi_data_type_T,rank_left,0,this->global_comm,&sreq);
 
     // determine the offset in ranks
     offset_coords[0] = 0;
     offset_coords[1] = 0;
-    offset_coords[2] = local_coords[2]+1;
+    offset_coords[2] = this->local_coords[2]+1;
 
     rem_coords[2] = offset_coords[2];
 
-    MPI_Cart_rank(global_comm,offset_coords,&rank_offset);
+    MPI_Cart_rank(this->global_comm,offset_coords,&rank_offset);
 
     // wait for communication
     MPI_Wait(&sreq,&sstat);
     MPI_Wait(&rreq,&rstat);
 
     // iterate about neighbor borders to determine the neighborship relation
-    n_neighbors->at(5) = 0;
-    for (int y = 0; y < global_dims[1]; ++y)
+    n_neighbors.at(5) = 0;
+    for (int y = 0; y < this->global_dims[1]; ++y)
     {
-        for (int x = 0; x < global_dims[0]; ++x)
+        for (int x = 0; x < this->global_dims[0]; ++x)
         {
             if (
                    (
-                        ( vertices_rem[4*(x+y*global_dims[0])+2] <= vertices_loc[2] && vertices_loc[2] <  vertices_rem[4*(x+y*global_dims[0])+3] ) ||
-                        ( vertices_rem[4*(x+y*global_dims[0])+2] <  vertices_loc[3] && vertices_loc[3] <= vertices_rem[4*(x+y*global_dims[0])+3] ) ||
-                        ( vertices_rem[4*(x+y*global_dims[0])+2] >= vertices_loc[2] && vertices_loc[2] < vertices_rem[4*(x+y*global_dims[0])+3] &&
-                          vertices_loc[3] >= vertices_rem[4*(x+y*global_dims[0])+3] )
+                        ( vertices_rem[4*(x+y*this->global_dims[0])+2] <= vertices_loc[2] && vertices_loc[2] <  vertices_rem[4*(x+y*this->global_dims[0])+3] ) ||
+                        ( vertices_rem[4*(x+y*this->global_dims[0])+2] <  vertices_loc[3] && vertices_loc[3] <= vertices_rem[4*(x+y*this->global_dims[0])+3] ) ||
+                        ( vertices_rem[4*(x+y*this->global_dims[0])+2] >= vertices_loc[2] && vertices_loc[2] < vertices_rem[4*(x+y*this->global_dims[0])+3] &&
+                          vertices_loc[3] >= vertices_rem[4*(x+y*this->global_dims[0])+3] )
                    )
                )
             if (
                    (
-                        ( vertices_rem[4*(x+y*global_dims[0])] <= vertices_loc[0] && vertices_loc[0] <  vertices_rem[4*(x+y*global_dims[0])+1] ) ||
-                        ( vertices_rem[4*(x+y*global_dims[0])] <  vertices_loc[1] && vertices_loc[1] <= vertices_rem[4*(x+y*global_dims[0])+1] ) ||
-                        ( vertices_rem[4*(x+y*global_dims[0])] >= vertices_loc[0] && vertices_loc[0] < vertices_rem[4*(x+y*global_dims[0])+1] &&
-                          vertices_loc[1] >= vertices_rem[4*(x+y*global_dims[0])+1] )
+                        ( vertices_rem[4*(x+y*this->global_dims[0])] <= vertices_loc[0] && vertices_loc[0] <  vertices_rem[4*(x+y*this->global_dims[0])+1] ) ||
+                        ( vertices_rem[4*(x+y*this->global_dims[0])] <  vertices_loc[1] && vertices_loc[1] <= vertices_rem[4*(x+y*this->global_dims[0])+1] ) ||
+                        ( vertices_rem[4*(x+y*this->global_dims[0])] >= vertices_loc[0] && vertices_loc[0] < vertices_rem[4*(x+y*this->global_dims[0])+1] &&
+                          vertices_loc[1] >= vertices_rem[4*(x+y*this->global_dims[0])+1] )
                    )
                )
             {
-                n_neighbors->at(5)++;
+                n_neighbors.at(5)++;
                 rem_coords[1] = y;
                 rem_coords[0] = x;
-                MPI_Cart_rank(global_comm,rem_coords,&rem_rank);
+                MPI_Cart_rank(this->global_comm,rem_coords,&rem_rank);
                 neighbors.push_back(rem_rank);
             }
         }
     }
 
     // barrier to ensure every process concluded the calculations before overwriting remote borders!
-    MPI_Barrier(global_comm);
+    MPI_Barrier(this->global_comm);
 
     // clear up vertices array
     delete[] vertices_loc;
@@ -635,7 +573,7 @@ template<class T, class W> void ALL_Staggered_LB<T,W>::get_neighbors(std::vector
 // provide list of neighbors in each direction
 template<class T, class W> void ALL_Staggered_LB<T,W>::get_neighbors(int** ret)
 {
-    *ret = n_neighbors->data();
+    *ret = n_neighbors.data();
 }
 
 #endif
diff --git a/include/ALL_Tensor.hpp b/include/ALL_Tensor.hpp
index d8f333f2eae03465b9529917da57bd55c95060d7..192de8ebc643b5d15c1a5b7aaabc871917760ae0 100644
--- a/include/ALL_Tensor.hpp
+++ b/include/ALL_Tensor.hpp
@@ -34,159 +34,73 @@ POSSIBILITY OF SUCH DAMAGE.
 #include <mpi.h>
 #include <exception>
 #include "ALL_CustomExceptions.hpp"
+#include "ALL_LB.hpp"
 
 // T: data type of vertices used in the balancer
 // W: data type of the work used in the balancer
-template <class T, class W> class ALL_Tensor_LB
+template <class T, class W> class ALL_Tensor_LB : public ALL_LB<T,W>
 {
     public:
         ALL_Tensor_LB() {}
-        ALL_Tensor_LB(int d, W w, T g) : dimension(d),
-                                         work(w), 
-                                         gamma(g)
+        ALL_Tensor_LB(int d, W w, T g) : 
+            ALL_LB<T,W>(d, g)
         {
+            this->set_work(w);
             // need the lower and upper bounds
             // of the domain for the tensor-based
             // load-balancing scheme
-            vertices = new T[2*dimension];
-            shifted_vertices = new T[2*dimension];
-            // global dimension of the system in each coordinate
-            global_dims = new int[dimension];
-            // coordinates of the local domain in the system
-            local_coords = new int[dimension];
-            // periodicity of the MPI communicator / system
-            periodicity = new int[dimension];
 
             // array of MPI communicators for each direction (to collect work
             // on each plane)
-            communicators = new MPI_Comm[dimension];
-            n_neighbors = new int[2*dimension];
+            communicators.resize(d);
+            n_neighbors.resize(2*d);
         }
 
-        ~ALL_Tensor_LB(); 
-
-        void set_vertices(T*);
+        ~ALL_Tensor_LB() = default;
 
         // setup communicators for the exchange of work in each domain
-        void setup(MPI_Comm);
+        void setup() override;
 
         // do a load-balancing step
-        virtual void balance();
+        virtual void balance(int step) override;
 
         // getter for variables (passed by reference to avoid
         // direct access to private members of the object)
 
-        // getter for base vertices
-        void get_vertices(T*);
-        // getter for shifted vertices
-        void get_shifted_vertices(T*);
-        // getter for shifted vertices
-        void get_shifted_vertices(std::vector<T>&);
-
         // neighbors
         // provide list of neighbors
-        virtual void get_neighbors(std::vector<int>&);
+        void get_neighbors(std::vector<int>&) override;
         // provide list of neighbors in each direction
-        virtual void get_neighbors(int**);
-
+        void get_neighbors(int**) override;
 
+        // empty override for setting method specific data
+        virtual void set_data(void*) override {}
     private:
-        // dimension of the system
-        int dimension;
-
-        // shift cooeficient
-        T gamma;
-
-        // work per domain
-        W work;
-
-        // vertices
-        T* vertices; 
-
-        // vertices after load-balancing shift
-        T* shifted_vertices; 
-
-        // MPI values
-        // global communicator
-        MPI_Comm global_comm;
-        // rank of the local domain in the communicator
-        int local_rank;
-        // global dimension of the system in each coordinate
-        int* global_dims;
-        // coordinates of the local domain in the system
-        int* local_coords;
-        // periodicity of the MPI communicator / system
-        int* periodicity;
-        
         // type for MPI communication
         MPI_Datatype mpi_data_type_T;
         MPI_Datatype mpi_data_type_W;
 
         // array of MPI communicators for each direction (to collect work
         // on each plane)
-        MPI_Comm* communicators;
+        std::vector<MPI_Comm> communicators;
 
         // list of neighbors
         std::vector<int> neighbors;
-        int* n_neighbors;
+        std::vector<int> n_neighbors;
 };
 
-template <class T, class W> ALL_Tensor_LB<T,W>::~ALL_Tensor_LB()
-{
-    if (vertices) delete vertices;
-    if (shifted_vertices) delete shifted_vertices;
-    if (global_dims) delete global_dims;
-    if (local_coords) delete local_coords;
-    if (periodicity) delete periodicity;
-    if (communicators) delete communicators;
-    if (n_neighbors) delete n_neighbors;
-}
-
-template <class T, class W> void ALL_Tensor_LB<T,W>::get_vertices(T* result)
-{
-    for (int i = 0; i < 2*dimension; ++i)
-    {
-        result[i] = vertices[i];
-    }
-}
-
-template <class T, class W> void ALL_Tensor_LB<T,W>::get_shifted_vertices(T* result)
-{
-    for (int i = 0; i < 2*dimension; ++i)
-    {
-        result[i] = shifted_vertices[i];
-    }
-}
-
-template <class T, class W> void ALL_Tensor_LB<T,W>::get_shifted_vertices(std::vector<T>& result)
-{
-    for (int i = 0; i < 2*dimension; ++i)
-    {
-        result.at(i) = shifted_vertices[i];
-    }
-}
-
-// set the actual vertices (unsafe due to array copy)
-template <class T, class W> void ALL_Tensor_LB<T,W>::set_vertices(T* v)
-{
-    for (int i = 0; i < 2*dimension; ++i)
-    {
-        vertices[i] = v[i];
-    }
-}
-
 // setup routine for the tensor-based load-balancing scheme
 // requires: 
-//              global_comm (int): cartesian MPI communicator, from
+//              this->global_comm (int): cartesian MPI communicator, from
 //                                 which separate sub communicators
 //                                 are derived in order to represent
 //                                 each plane of domains in the system
-template <class T, class W> void ALL_Tensor_LB<T,W>::setup(MPI_Comm comm)
+template <class T, class W> void ALL_Tensor_LB<T,W>::setup()
 {
     int status;
 
     // check if Communicator is cartesian
-    MPI_Topo_test(comm, &status);
+    MPI_Topo_test(this->global_comm, &status);
     if (status != MPI_CART)
     {
         throw ALL_Invalid_Comm_Type_Exception(
@@ -195,25 +109,19 @@ template <class T, class W> void ALL_Tensor_LB<T,W>::setup(MPI_Comm comm)
                 __LINE__,
                 "Cartesian MPI communicator required, passed communicator not cartesian");
     }
-    
-    // allocate arrays to correct sizes
-    global_dims = new int[dimension];
-    local_coords = new int[dimension];
-    periodicity = new int[dimension];
-
-    // store MPI communicator to local variable
-    global_comm = comm;
 
+    int dim = this->get_dimension();
+    
     // get the local coordinates, periodicity and global size from the MPI communicator
-    MPI_Cart_get(global_comm, dimension, global_dims, periodicity, local_coords);
+    MPI_Cart_get(this->global_comm, dim, this->global_dims.data(), this->periodicity.data(), this->local_coords.data());
 
     // get the local rank from the MPI communicator
-    MPI_Cart_rank(global_comm, local_coords, &local_rank);
+    MPI_Cart_rank(this->global_comm, this->local_coords.data(), &this->local_rank);
 
     // create sub-communicators 
-    for (int i = 0; i < dimension; ++i)
+    for (int i = 0; i < dim; ++i)
     {
-        MPI_Comm_split(global_comm,local_coords[i],0,&communicators[i]);
+        MPI_Comm_split(this->global_comm,this->local_coords.at(i),0,&communicators[i]);
     }
 
     // determine correct MPI data type for template T
@@ -232,9 +140,9 @@ template <class T, class W> void ALL_Tensor_LB<T,W>::setup(MPI_Comm comm)
     int rank_left, rank_right;
 
     neighbors.clear();
-    for (int i = 0; i < dimension; ++i)
+    for (int i = 0; i < dim; ++i)
     {
-        MPI_Cart_shift(global_comm,i,1,&rank_left,&rank_right);
+        MPI_Cart_shift(this->global_comm,i,1,&rank_left,&rank_right);
         neighbors.push_back(rank_left);
         neighbors.push_back(rank_right);
         n_neighbors[2*i] = 1;
@@ -243,40 +151,44 @@ template <class T, class W> void ALL_Tensor_LB<T,W>::setup(MPI_Comm comm)
     
 }
 
-template<class T, class W> void ALL_Tensor_LB<T,W>::balance()
+template<class T, class W> void ALL_Tensor_LB<T,W>::balance(int)
 {
+    int dim = this->get_dimension();
+    std::vector<ALL_Point<T>>& vertices = this->get_vertices();
+    std::vector<ALL_Point<T>>& shifted_vertices = this->get_shifted_vertices();
+    
     // loop over all available dimensions
-    for (int i = 0; i < dimension; ++i)
+    for (int i = 0; i < dim; ++i)
     {
         W work_local_plane;
         // collect work from all processes in the same plane
-        MPI_Allreduce(&work,&work_local_plane,1,mpi_data_type_W,MPI_SUM,communicators[i]);
+        MPI_Allreduce(this->get_work().data(),&work_local_plane,1,mpi_data_type_W,MPI_SUM,communicators.at(i));
         
         // correct right border:
 
         W remote_work;
-        T size;
+        T local_size;
         T remote_size;
         // determine neighbors
         int rank_left, rank_right;
 
-        MPI_Cart_shift(global_comm,i,1,&rank_left,&rank_right);
+        MPI_Cart_shift(this->global_comm,i,1,&rank_left,&rank_right);
 
         // collect work from right neighbor plane
         MPI_Request sreq, rreq;
         MPI_Status sstat, rstat;
 
-        MPI_Irecv(&remote_work,1,mpi_data_type_W,rank_right,0,global_comm,&rreq);
-        MPI_Isend(&work_local_plane,1,mpi_data_type_W,rank_left,0,global_comm,&sreq);
+        MPI_Irecv(&remote_work,1,mpi_data_type_W,rank_right,0,this->global_comm,&rreq);
+        MPI_Isend(&work_local_plane,1,mpi_data_type_W,rank_left,0,this->global_comm,&sreq);
         MPI_Wait(&sreq,&sstat);
         MPI_Wait(&rreq,&rstat);
 
         // collect size in dimension from right neighbor plane
 
-        size = vertices[dimension+i] - vertices[i];
+        local_size = vertices.at(1).x(i) - vertices.at(0).x(i);
         
-        MPI_Irecv(&remote_size,1,mpi_data_type_T,rank_right,0,global_comm,&rreq);
-        MPI_Isend(&size,1,mpi_data_type_T,rank_left,0,global_comm,&sreq);
+        MPI_Irecv(&remote_size,1,mpi_data_type_T,rank_right,0,this->global_comm,&rreq);
+        MPI_Isend(&local_size,1,mpi_data_type_T,rank_left,0,this->global_comm,&sreq);
         MPI_Wait(&sreq,&sstat);
         MPI_Wait(&rreq,&rstat);
 
@@ -285,41 +197,42 @@ template<class T, class W> void ALL_Tensor_LB<T,W>::balance()
         // *_r = * of neighbor (remote)
         // *_l = * of local domain
         T shift = (T)0;
-        if (rank_right != MPI_PROC_NULL && local_coords[i] != global_dims[i]-1 && !( remote_work == (T)0 && work_local_plane == (T)0) )
-            shift = 1.0 / gamma * 0.5 * (remote_work - work_local_plane) / (remote_work + work_local_plane) * (size + remote_size);
-
-        if (shift < 0.0)
+        if (rank_right != MPI_PROC_NULL && this->local_coords.at(i) != this->global_dims.at(i)-1 && !( remote_work == (T)0 && work_local_plane == (T)0) )
+            shift = 1.0 / this->get_gamma() * 0.5 * (remote_work - work_local_plane) / (remote_work + work_local_plane) * (local_size + remote_size);
+
+        // determine a maximum move in order to avoid overlapping borders and to guarantee
+        // a stable shift of the domain borders (avoid the loss of a domain due to too large
+        // shifts of its borders [no crossing of borders])
+        T maxmove;
+        if (shift > 0.0) 
+            maxmove = 0.49*(remote_size-this->min_size[i]);
+        else 
+            maxmove = 0.49*(local_size-this->min_size[i]);
+
+        if (std::fabs(shift) > maxmove)
         {
-            if (abs(shift) > 0.45 * size) shift = -0.45 * size;
+            shift = (shift < 0.0)?-maxmove:maxmove;
         }
-        else
-        {
-            if (abs(shift) > 0.45 * remote_size) shift = 0.45 * remote_size;
-        }
-
 
         // send shift to right neighbors
         T remote_shift = (T)0;
         
-        MPI_Irecv(&remote_shift,1,mpi_data_type_T,rank_left,0,global_comm,&rreq);
-        MPI_Isend(&shift,1,mpi_data_type_T,rank_right,0,global_comm,&sreq);
+        MPI_Irecv(&remote_shift,1,mpi_data_type_T,rank_left,0,this->global_comm,&rreq);
+        MPI_Isend(&shift,1,mpi_data_type_T,rank_right,0,this->global_comm,&sreq);
         MPI_Wait(&sreq,&sstat);
         MPI_Wait(&rreq,&rstat);
 
-        // for now: test case for simple program
-
         // if a left neighbor exists: shift left border 
-        if (rank_left != MPI_PROC_NULL && local_coords[i] != 0)
-            shifted_vertices[i] = vertices[i] + remote_shift;
+        if (rank_left != MPI_PROC_NULL && this->local_coords[i] != 0)
+            this->shifted_vertices.at(0).set_coordinate(i, vertices.at(0).x(i) + remote_shift);
         else
-            shifted_vertices[i] = vertices[i];
+            this->shifted_vertices.at(0).set_coordinate(i, vertices.at(0).x(i));
 
         // if a right neighbor exists: shift right border
-        if (rank_right != MPI_PROC_NULL && local_coords[i] != global_dims[i]-1)
-            shifted_vertices[dimension+i] = vertices[dimension+i] + shift;
+        if (rank_right != MPI_PROC_NULL && this->local_coords[i] != this->global_dims[i]-1)
+            this->shifted_vertices.at(1).set_coordinate(i, vertices.at(1).x(i) + shift);
         else
-            shifted_vertices[dimension+i] = vertices[dimension+i];
-
+            this->shifted_vertices.at(1).set_coordinate(i, vertices.at(1).x(i));
     }
 }
 
@@ -331,6 +244,6 @@ template<class T, class W> void ALL_Tensor_LB<T,W>::get_neighbors(std::vector<in
 // provide list of neighbors in each direction
 template<class T, class W> void ALL_Tensor_LB<T,W>::get_neighbors(int** ret)
 {
-    *ret = n_neighbors;
+    *ret = n_neighbors.data();
 }
 #endif
diff --git a/include/ALL_Unstructured.hpp b/include/ALL_Unstructured.hpp
index f6797558361ad1efaec0af6fa450ead8096b8f1a..76cb14a327bdcababa26b1d08451d8ccb5a81fe7 100644
--- a/include/ALL_Unstructured.hpp
+++ b/include/ALL_Unstructured.hpp
@@ -67,103 +67,57 @@ POSSIBILITY OF SUCH DAMAGE.
 #include <cmath>
 #include "ALL_CustomExceptions.hpp"
 #include "ALL_Point.hpp"
+#include "ALL_LB.hpp"
 
 // T: data type of vertices used in the balancer
 // W: data type of the work used in the balancer
-template <class T, class W> class ALL_Unstructured_LB
+template <class T, class W> class ALL_Unstructured_LB : public ALL_LB<T,W>
 {
     public:
         ALL_Unstructured_LB() {}
-        ALL_Unstructured_LB(int d, W w, T g) : dimension(d),
-                                         work(w), 
-                                         gamma(g)
+        ALL_Unstructured_LB(int d, W w, T g) : 
+            ALL_LB<T,W>(d, g)
         {
+            this->set_work(w);
             // need the lower and upper bounds
             // of the domain for the tensor-based
             // load-balancing scheme
-            n_vertices = (int)std::pow(2,dimension);
-            vertices = new std::vector<ALL_Point<T>>(n_vertices);
-            vertex_rank = new int[n_vertices];
-            shifted_vertices = new std::vector<ALL_Point<T>>(n_vertices);
-            // global dimension of the system in each coordinate
-            global_dims = new int[dimension];
-            // coordinates of the local domain in the system
-            local_coords = new int[dimension];
-            // periodicity of the MPI communicator / system
-            periodicity = new int[dimension];
-
-            // array of MPI communicators for each vertex (2 in 1D, 4 in 2D, 8 in 3D)
-            communicators = new MPI_Comm[n_vertices];
+            vertex_rank.resize(8);
         }
 
-        ~ALL_Unstructured_LB(); 
-
-        void set_vertices(T*);
-        void set_vertices(std::vector<ALL_Point<T>>&);
+        ~ALL_Unstructured_LB() override;
 
         // setup communicators for the exchange of work in each domain
-        void setup(MPI_Comm);
+        void setup() override;
 
         // do a load-balancing step
-        virtual void balance();
+        void balance(int step) override;
 
         // getter for variables (passed by reference to avoid
         // direct access to private members of the object)
 
-        // getter for base vertices
-        void get_vertices(T*);
-        // getter for shifted vertices
-        void get_shifted_vertices(T*);
-        // getter for shifted vertices
-        void get_shifted_vertices(std::vector<ALL_Point<T>>&);
-
         // neighbors
         // provide list of neighbors
-        virtual void get_neighbors(std::vector<int>&);
+        void get_neighbors(std::vector<int>&) override;
         // provide list of neighbors in each direction
-        virtual void get_neighbors(int**);
-
+        void get_neighbors(int**) override;
 
+        // empty override for setting method specific data
+        virtual void set_data(void*) override {}
+        
     private:
-        // dimension of the system
-        int dimension;
-
-        // number of vertices (based on dimension)
-        int n_vertices;
-
-        // shift cooeficient
-        T gamma;
-
-        // work per domain
-        W work;
-
-        // vertices
-        std::vector<ALL_Point<T>>* vertices; 
-
-        // vertices after load-balancing shift
-        std::vector<ALL_Point<T>>* shifted_vertices; 
-
         // MPI values
-        // global communicator
-        MPI_Comm global_comm;
-        // rank of the local domain in the communicator
-        int local_rank;
+
         // rank(s) in the communicators of the vertices
-        int* vertex_rank;
-        // global dimension of the system in each coordinate
-        int* global_dims;
-        // coordinates of the local domain in the system
-        int* local_coords;
-        // periodicity of the MPI communicator / system
-        int* periodicity;
-        
+        std::vector<int> vertex_rank;
+
         // type for MPI communication
         MPI_Datatype mpi_data_type_T;
         MPI_Datatype mpi_data_type_W;
 
         // array of MPI communicators for each direction (to collect work
         // on each plane)
-        MPI_Comm* communicators;
+        std::vector<MPI_Comm> communicators;
 
         // list of neighbors
         std::vector<int> neighbors;
@@ -171,60 +125,13 @@ template <class T, class W> class ALL_Unstructured_LB
         // find neighbors (more sophisticated than for tensor-product
         // variant of LB)
         void find_neighbors();
+        
+        // number of vertices (since it is not equal for all domains)
+        int n_vertices;
 };
 
 template <class T, class W> ALL_Unstructured_LB<T,W>::~ALL_Unstructured_LB()
 {
-    if (vertices) delete vertices;
-    if (vertex_rank) delete vertex_rank;
-    if (shifted_vertices) delete shifted_vertices;
-    if (global_dims) delete global_dims;
-    if (local_coords) delete local_coords;
-    if (periodicity) delete periodicity;
-    if (communicators) delete communicators;
-}
-
-template <class T, class W> void ALL_Unstructured_LB<T,W>::get_vertices(T* result)
-{
-    for (int i = 0; i < vertices->size()*dimension; ++i)
-    {
-        result[i] = vertices->at(i);
-    }
-}
-
-template <class T, class W> void ALL_Unstructured_LB<T,W>::get_shifted_vertices(T* result)
-{
-    for (int i = 0; i < vertices->size(); ++i)
-    {
-        for (int j = 0; j < dimension; ++j)
-        {
-           result[i*dimension+j] = shifted_vertices->at(i).x(j);
-        }
-    }
-}
-
-template <class T, class W> void ALL_Unstructured_LB<T,W>::get_shifted_vertices(std::vector<ALL_Point<T>>& result)
-{
-    for (int i = 0; i < shifted_vertices->size(); ++i)
-    {
-        result.at(i) = shifted_vertices->at(i);
-    }
-}
-
-// set the actual vertices (unsafe due to array copy)
-template <class T, class W> void ALL_Unstructured_LB<T,W>::set_vertices(T* v)
-{
-    for (int i = 0; i < vertices->size()*dimension; ++i)
-    {
-        vertices->at(i) = v[i];
-    }
-}
-
-// set the actual vertices 
-template <class T, class W> void ALL_Unstructured_LB<T,W>::set_vertices(std::vector<ALL_Point<T>>& v)
-{
-    // TODO: check if size(v) = 2^dim 
-    *vertices = v;
 }
 
 // provide list of neighbors
@@ -238,10 +145,11 @@ template<class T, class W> void ALL_Unstructured_LB<T,W>::get_neighbors(int** re
     *ret = neighbors.data();
 }
 
-template <class T, class W> void ALL_Unstructured_LB<T,W>::setup(MPI_Comm comm)
+template <class T, class W> void ALL_Unstructured_LB<T,W>::setup()
 {
-    global_comm = comm;
-
+    n_vertices = this->vertices.size();
+    communicators = std::vector<MPI_Comm>(n_vertices);
+    
     // determine correct MPI data type for template T
     if (std::is_same<T,double>::value) mpi_data_type_T = MPI_DOUBLE;
     else if (std::is_same<T,float>::value) mpi_data_type_T = MPI_FLOAT;
@@ -271,11 +179,11 @@ template <class T, class W> void ALL_Unstructured_LB<T,W>::setup(MPI_Comm comm)
     }
 
     MPI_Group all;
-    MPI_Comm_group(global_comm,&all);
+    MPI_Comm_group(this->global_comm,&all);
 
     // check if communicator is cartesian
     int status;
-    MPI_Topo_test(global_comm, &status);
+    MPI_Topo_test(this->global_comm, &status);
 
     if (status != MPI_CART)
     {
@@ -286,11 +194,11 @@ template <class T, class W> void ALL_Unstructured_LB<T,W>::setup(MPI_Comm comm)
                 "Cartesian MPI communicator required, passed communicator is not cartesian");
     }
 
-    // get the local coordinates, periodicity and global size from the MPI communicator
-    MPI_Cart_get(global_comm, dimension, global_dims, periodicity, local_coords);
+    // get the local coordinates, this->periodicity and global size from the MPI communicator
+    MPI_Cart_get(this->global_comm, this->dimension, this->global_dims.data(), this->periodicity.data(), this->local_coords.data());
 
     // get the local rank from the MPI communicator
-    MPI_Cart_rank(global_comm, local_coords, &local_rank);
+    MPI_Cart_rank(this->global_comm, this->local_coords.data(), &this->local_rank);
 
     // groups required for new communicators
     MPI_Group groups[n_vertices];
@@ -298,7 +206,7 @@ template <class T, class W> void ALL_Unstructured_LB<T,W>::setup(MPI_Comm comm)
     int processes[n_vertices][n_vertices];
 
     // shifted local coordinates to find neighboring processes
-    int shifted_coords[dimension];
+    int shifted_coords[this->dimension];
 
     /*
 
@@ -313,16 +221,16 @@ template <class T, class W> void ALL_Unstructured_LB<T,W>::setup(MPI_Comm comm)
         for (int x = 0; x < 2; ++x)
         {
             // group / communicator for vertex 0
-            shifted_coords[0] = local_coords[0] - 1 + x;
+            shifted_coords[0] = this->local_coords.at(0) - 1 + x;
 
-            MPI_Cart_rank(global_comm, shifted_coords, &(processes[x][0])); 
+            MPI_Cart_rank(this->global_comm, shifted_coords, &(processes[x][0])); 
 
-            shifted_coords[0] = local_coords[0] + x;
+            shifted_coords[0] = this->local_coords.at(0) + x;
 
-            MPI_Cart_rank(global_comm, shifted_coords, &(processes[x][1])); 
+            MPI_Cart_rank(this->global_comm, shifted_coords, &(processes[x][1])); 
 
             MPI_Group_incl(all, 2, &processes[x][0], &groups[x]);
-            MPI_Comm_create(global_comm, groups[x], &communicators[x]);
+            MPI_Comm_create(this->global_comm, groups[x], &communicators[x]);
 
             // get local rank in vertex communicator
             MPI_Comm_rank(communicators[x],&vertex_rank[x]);
@@ -343,28 +251,28 @@ template <class T, class W> void ALL_Unstructured_LB<T,W>::setup(MPI_Comm comm)
             {
                 int vertex = 2 * y + x;
 
-                shifted_coords[0] = local_coords[0] - 1 + x;
-                shifted_coords[1] = local_coords[1] - 1 + y;
+                shifted_coords[0] = this->local_coords.at(0) - 1 + x;
+                shifted_coords[1] = this->local_coords.at(1) - 1 + y;
 
-                MPI_Cart_rank(global_comm, shifted_coords, &(processes[vertex][0])); 
+                MPI_Cart_rank(this->global_comm, shifted_coords, &(processes[vertex][0])); 
 
-                shifted_coords[0] = local_coords[0] + x;
-                shifted_coords[1] = local_coords[1] - 1 + y;
+                shifted_coords[0] = this->local_coords.at(0) + x;
+                shifted_coords[1] = this->local_coords.at(1) - 1 + y;
 
-                MPI_Cart_rank(global_comm, shifted_coords, &(processes[vertex][1])); 
+                MPI_Cart_rank(this->global_comm, shifted_coords, &(processes[vertex][1])); 
 
-                shifted_coords[0] = local_coords[0] - 1 + x;
-                shifted_coords[1] = local_coords[1] + y;
+                shifted_coords[0] = this->local_coords.at(0) - 1 + x;
+                shifted_coords[1] = this->local_coords.at(1) + y;
 
-                MPI_Cart_rank(global_comm, shifted_coords, &(processes[vertex][2])); 
+                MPI_Cart_rank(this->global_comm, shifted_coords, &(processes[vertex][2])); 
 
-                shifted_coords[0] = local_coords[0] + x;
-                shifted_coords[1] = local_coords[1] + y;
+                shifted_coords[0] = this->local_coords.at(0) + x;
+                shifted_coords[1] = this->local_coords.at(1) + y;
 
-                MPI_Cart_rank(global_comm, shifted_coords, &(processes[vertex][3])); 
+                MPI_Cart_rank(this->global_comm, shifted_coords, &(processes[vertex][3])); 
 
                 MPI_Group_incl(all, 4, &processes[vertex][0], &groups[vertex]);
-                MPI_Comm_create(global_comm, groups[vertex], &communicators[vertex]);
+                MPI_Comm_create(this->global_comm, groups[vertex], &communicators[vertex]);
 
                 // get local rank in vertex communicator
                 MPI_Comm_rank(communicators[vertex],&vertex_rank[vertex]);
@@ -392,56 +300,56 @@ template <class T, class W> void ALL_Unstructured_LB<T,W>::setup(MPI_Comm comm)
                     int vertex = 4 * z + 2 * y + x;
 
                     // group / communicator for vertex 0,2,4,6
-                    shifted_coords[0] = local_coords[0] - 1 + x;
-                    shifted_coords[1] = local_coords[1] - 1 + y;
-                    shifted_coords[2] = local_coords[2] - 1 + z;
+                    shifted_coords[0] = this->local_coords.at(0) - 1 + x;
+                    shifted_coords[1] = this->local_coords.at(1) - 1 + y;
+                    shifted_coords[2] = this->local_coords.at(2) - 1 + z;
 
-                    MPI_Cart_rank(global_comm, shifted_coords, &(processes[vertex][0])); 
+                    MPI_Cart_rank(this->global_comm, shifted_coords, &(processes[vertex][0])); 
 
-                    shifted_coords[0] = local_coords[0] + x;
-                    shifted_coords[1] = local_coords[1] - 1 + y;
-                    shifted_coords[2] = local_coords[2] - 1 + z;
+                    shifted_coords[0] = this->local_coords.at(0) + x;
+                    shifted_coords[1] = this->local_coords.at(1) - 1 + y;
+                    shifted_coords[2] = this->local_coords.at(2) - 1 + z;
 
-                    MPI_Cart_rank(global_comm, shifted_coords, &(processes[vertex][1])); 
+                    MPI_Cart_rank(this->global_comm, shifted_coords, &(processes[vertex][1])); 
 
-                    shifted_coords[0] = local_coords[0] - 1 + x;
-                    shifted_coords[1] = local_coords[1] + y;
-                    shifted_coords[2] = local_coords[2] - 1 + z;
+                    shifted_coords[0] = this->local_coords.at(0) - 1 + x;
+                    shifted_coords[1] = this->local_coords.at(1) + y;
+                    shifted_coords[2] = this->local_coords.at(2) - 1 + z;
 
-                    MPI_Cart_rank(global_comm, shifted_coords, &(processes[vertex][2])); 
+                    MPI_Cart_rank(this->global_comm, shifted_coords, &(processes[vertex][2])); 
 
-                    shifted_coords[0] = local_coords[0] + x;
-                    shifted_coords[1] = local_coords[1] + y;
-                    shifted_coords[2] = local_coords[2] - 1 + z;
+                    shifted_coords[0] = this->local_coords.at(0) + x;
+                    shifted_coords[1] = this->local_coords.at(1) + y;
+                    shifted_coords[2] = this->local_coords.at(2) - 1 + z;
 
-                    MPI_Cart_rank(global_comm, shifted_coords, &(processes[vertex][3])); 
+                    MPI_Cart_rank(this->global_comm, shifted_coords, &(processes[vertex][3])); 
 
-                    shifted_coords[0] = local_coords[0] - 1 + x;
-                    shifted_coords[1] = local_coords[1] - 1 + y;
-                    shifted_coords[2] = local_coords[2] + z;
+                    shifted_coords[0] = this->local_coords.at(0) - 1 + x;
+                    shifted_coords[1] = this->local_coords.at(1) - 1 + y;
+                    shifted_coords[2] = this->local_coords.at(2) + z;
 
-                    MPI_Cart_rank(global_comm, shifted_coords, &(processes[vertex][4])); 
+                    MPI_Cart_rank(this->global_comm, shifted_coords, &(processes[vertex][4])); 
 
-                    shifted_coords[0] = local_coords[0] + x;
-                    shifted_coords[1] = local_coords[1] - 1 + y;
-                    shifted_coords[2] = local_coords[2] + z;
+                    shifted_coords[0] = this->local_coords.at(0) + x;
+                    shifted_coords[1] = this->local_coords.at(1) - 1 + y;
+                    shifted_coords[2] = this->local_coords.at(2) + z;
 
-                    MPI_Cart_rank(global_comm, shifted_coords, &(processes[vertex][5])); 
+                    MPI_Cart_rank(this->global_comm, shifted_coords, &(processes[vertex][5])); 
 
-                    shifted_coords[0] = local_coords[0] - 1 + x;
-                    shifted_coords[1] = local_coords[1] + y;
-                    shifted_coords[2] = local_coords[2] + z;
+                    shifted_coords[0] = this->local_coords.at(0) - 1 + x;
+                    shifted_coords[1] = this->local_coords.at(1) + y;
+                    shifted_coords[2] = this->local_coords.at(2) + z;
 
-                    MPI_Cart_rank(global_comm, shifted_coords, &(processes[vertex][6])); 
+                    MPI_Cart_rank(this->global_comm, shifted_coords, &(processes[vertex][6])); 
 
-                    shifted_coords[0] = local_coords[0] + x;
-                    shifted_coords[1] = local_coords[1] + y;
-                    shifted_coords[2] = local_coords[2] + z;
+                    shifted_coords[0] = this->local_coords.at(0) + x;
+                    shifted_coords[1] = this->local_coords.at(1) + y;
+                    shifted_coords[2] = this->local_coords.at(2) + z;
 
-                    MPI_Cart_rank(global_comm, shifted_coords, &(processes[vertex][7])); 
+                    MPI_Cart_rank(this->global_comm, shifted_coords, &(processes[vertex][7])); 
 
                     MPI_Group_incl(all, 8, &processes[vertex][0], &groups[vertex]);
-                    MPI_Comm_create(global_comm, groups[vertex], &communicators[vertex]);
+                    MPI_Comm_create(this->global_comm, groups[vertex], &communicators[vertex]);
 
                     // get local rank in vertex communicator
                     MPI_Comm_rank(communicators[vertex],&vertex_rank[vertex]);
@@ -450,7 +358,7 @@ template <class T, class W> void ALL_Unstructured_LB<T,W>::setup(MPI_Comm comm)
         }
         for (int i = 0; i < 8; ++i)
         {
-            if (i == local_rank)
+            if (i == this->local_rank)
             {
                 std::cout << "local rank: " << i << " " << std::endl;
                 for (int k = 0; k < 8; ++k)
@@ -477,56 +385,66 @@ template <class T, class W> void ALL_Unstructured_LB<T,W>::setup(MPI_Comm comm)
     }
     */
 
-    int dim_vert[dimension];
 
-    for (int d = 0; d < dimension; ++d)
-    {
-        dim_vert[d] = global_dims[d];
-    }
+#ifdef ALL_DEBUG_ENABLED
+    MPI_Barrier(this->global_comm);
+    if (this->local_rank == 0) std::cout << "ALL_Unstructured<T,W>::setup() preparing communicators..." << std::endl;    
+#endif    
+    std::vector<int> dim_vert(this->global_dims);
 
     MPI_Comm tmp_comm;
     int own_vertex;
 
     for (int i = 0; i < n_vertices; ++i)
-        communicators[i] = MPI_COMM_SELF;
-
-    for (int iz = 0; iz < dim_vert[2]; ++iz)
+        communicators.at(i) = MPI_COMM_SELF;
+
+#ifdef ALL_DEBUG_ENABLED
+    MPI_Barrier(this->global_comm);
+    if (this->local_rank == 0) std::cout << "ALL_Unstructured<T,W>::setup() computing communicators..." << std::endl;
+    std::cout << "DEBUG: " << " rank: " << this->local_rank
+                           << " dim_vert: " << dim_vert.at(0) << " " << dim_vert.at(1) << " " << dim_vert.at(2)
+                           << " size(local_coords): " << this->local_coords.size() << " "
+                           << " size(global_dims):  " << this->global_dims.size()
+                           << " size(vertex_rank):  " << vertex_rank.size()
+                           << std::endl;
+#endif    
+    for (int iz = 0; iz < dim_vert.at(2); ++iz)
     {
-        for (int iy = 0; iy < dim_vert[1]; ++iy)
+        for (int iy = 0; iy < dim_vert.at(1); ++iy)
         {
-            for (int ix = 0; ix < dim_vert[0]; ++ix)
+            for (int ix = 0; ix < dim_vert.at(0); ++ix)
             {   
-                if (ix == ( ( local_coords[0] + 0 ) % global_dims[0])  &&
-                    iy == ( ( local_coords[1] + 0 ) % global_dims[1])  &&
-                    iz == ( ( local_coords[2] + 0 ) % global_dims[2]) )
+                if (ix == ( ( this->local_coords.at(0) + 0 ) % this->global_dims.at(0))  &&
+                    iy == ( ( this->local_coords.at(1) + 0 ) % this->global_dims.at(1))  &&
+                    iz == ( ( this->local_coords.at(2) + 0 ) % this->global_dims.at(2)) )
                     own_vertex = 0;
-                else if (ix == ( ( local_coords[0] + 1 ) % global_dims[0])  &&
-                         iy == ( ( local_coords[1] + 0 ) % global_dims[1])  &&
-                         iz == ( ( local_coords[2] + 0 ) % global_dims[2]) )
+                else if (ix == ( ( this->local_coords.at(0) + 1 ) % this->global_dims.at(0))  &&
+                         iy == ( ( this->local_coords.at(1) + 0 ) % this->global_dims.at(1))  &&
+                         iz == ( ( this->local_coords.at(2) + 0 ) % this->global_dims.at(2)) )
                     own_vertex = 1;
-                else if (ix == ( ( local_coords[0] + 0 ) % global_dims[0])  &&
-                         iy == ( ( local_coords[1] + 1 ) % global_dims[1])  &&
-                         iz == ( ( local_coords[2] + 0 ) % global_dims[2]) )
+                else if (ix == ( ( this->local_coords.at(0) + 0 ) % this->global_dims.at(0))  &&
+                         iy == ( ( this->local_coords.at(1) + 1 ) % this->global_dims.at(1))  &&
+                         iz == ( ( this->local_coords.at(2) + 0 ) % this->global_dims.at(2)) )
                     own_vertex = 2;
-                else if (ix == ( ( local_coords[0] + 1 ) % global_dims[0])  &&
-                         iy == ( ( local_coords[1] + 1 ) % global_dims[1])  &&
-                         iz == ( ( local_coords[2] + 0 ) % global_dims[2]) )
+                else if (ix == ( ( this->local_coords.at(0) + 1 ) % this->global_dims.at(0))  &&
+                         iy == ( ( this->local_coords.at(1) + 1 ) % this->global_dims.at(1))  &&
+                         iz == ( ( this->local_coords.at(2) + 0 ) % this->global_dims.at(2)) )
                     own_vertex = 3;
-                else if (ix == ( ( local_coords[0] + 0 ) % global_dims[0])  &&
-                         iy == ( ( local_coords[1] + 0 ) % global_dims[1])  &&
-                         iz == ( ( local_coords[2] + 1 ) % global_dims[2]) )
+                else if (ix == ( ( this->local_coords.at(0) + 0 ) % this->global_dims.at(0))  &&
+                         iy == ( ( this->local_coords.at(1) + 0 ) % this->global_dims.at(1))  &&
+                         iz == ( ( this->local_coords.at(2) + 1 ) % this->global_dims.at(2)) )
                     own_vertex = 4;
-                else if (ix == ( ( local_coords[0] + 1 ) % global_dims[0])  &&
-                         iy == ( ( local_coords[1] + 0 ) % global_dims[1])  &&
-                         iz == ( ( local_coords[2] + 1 ) % global_dims[2]) )
+                else if (ix == ( ( this->local_coords.at(0) + 1 ) % this->global_dims.at(0))  &&
+                         iy == ( ( this->local_coords.at(1) + 0 ) % this->global_dims.at(1))  &&
+                         iz == ( ( this->local_coords.at(2) + 1 ) % this->global_dims.at(2)) )
                     own_vertex = 5;
-                else if (ix == ( ( local_coords[0] + 0 ) % global_dims[0])  &&
-                         iy == ( ( local_coords[1] + 1 ) % global_dims[1])  &&
-                         iz == ( ( local_coords[2] + 1 ) % global_dims[2]) )
+                else if (ix == ( ( this->local_coords.at(0) + 0 ) % this->global_dims.at(0))  &&
+                         iy == ( ( this->local_coords.at(1) + 1 ) % this->global_dims.at(1))  &&
+                         iz == ( ( this->local_coords.at(2) + 1 ) % this->global_dims.at(2)) )
                     own_vertex = 6;
-                else if (ix == ( ( local_coords[0] + 1 ) % global_dims[0])  &&
-                         iy == ( ( local_coords[1] + 1 ) % global_dims[1])  &&
-                         iz == ( ( local_coords[2] + 1 ) % global_dims[2]) )
+                else if (ix == ( ( this->local_coords.at(0) + 1 ) % this->global_dims.at(0))  &&
+                         iy == ( ( this->local_coords.at(1) + 1 ) % this->global_dims.at(1))  &&
+                         iz == ( ( this->local_coords.at(2) + 1 ) % this->global_dims.at(2)) )
                     own_vertex = 7;
                 else 
                     own_vertex = -1;
@@ -535,63 +453,67 @@ template <class T, class W> void ALL_Unstructured_LB<T,W>::setup(MPI_Comm comm)
                 int result;
                 if (own_vertex >= 0)
                 {
-                    vertex_rank[own_vertex] = 7 - own_vertex;
-                    result = MPI_Comm_split(global_comm,1,vertex_rank[own_vertex],&communicators[own_vertex]);
-                    MPI_Comm_rank(communicators[own_vertex],&vertex_rank[own_vertex]);
+                    vertex_rank.at(own_vertex) = 7 - own_vertex;
+                    result = MPI_Comm_split(this->global_comm,1,vertex_rank.at(own_vertex),&communicators.at(own_vertex));
+                    MPI_Comm_rank(communicators.at(own_vertex),&vertex_rank.at(own_vertex));
                 }
                 else
                 {
-                    result = MPI_Comm_split(global_comm,0,0,&tmp_comm);
+                    result = MPI_Comm_split(this->global_comm,0,0,&tmp_comm);
                 }
                 if (result != MPI_SUCCESS)
                 {
-                    std::cout << "RANK: " << local_rank << " ERROR in MPI_Comm_Split" << std::endl;
+                    std::cout << "RANK: " << this->local_rank << " ERROR in MPI_Comm_Split" << std::endl;
                 }
             }
         }
+#ifdef ALL_DEBUG_ENABLED
+    MPI_Barrier(this->global_comm);
+    if (this->local_rank == 0) std::cout << "ALL_Unstructured<T,W>::setup() finished computing communicators..." << std::endl;    
+#endif    
     }
     for (int vertex = 0; vertex < n_vertices; ++vertex)
-        MPI_Comm_rank(communicators[vertex],&vertex_rank[vertex]);
+        MPI_Comm_rank(communicators.at(vertex),&vertex_rank.at(vertex));
 
 }
 
 // TODO: periodic boundary conditions (would require size of the system)
 //       for now: do not change the vertices along the edge of the system
 
-template <class T, class W> void ALL_Unstructured_LB<T,W>::balance()
+template <class T, class W> void ALL_Unstructured_LB<T,W>::balance(int /*step*/)
 {
     // geometrical center and work of each neighbor for each vertex
     // work is cast to vertex data type, since in the end a shift
     // of the vertex is computed
-    T vertex_info[n_vertices*n_vertices*(dimension+2)];
-    T center[dimension];
+    T vertex_info[n_vertices*n_vertices*(this->dimension+2)];
+    T center[this->dimension];
 
     // local geometric center
-    T local_info[(dimension+2) * n_vertices];
+    T local_info[(this->dimension+2) * n_vertices];
 
-    for (int i = 0; i < n_vertices*n_vertices*(dimension+2); ++i)
+    for (int i = 0; i < n_vertices*n_vertices*(this->dimension+2); ++i)
         vertex_info[i] = -1;
 
-    for (int d = 0; d < (dimension+2) * n_vertices; ++d)
+    for (int d = 0; d < (this->dimension+2) * n_vertices; ++d)
         local_info[d] = (T)0;
 
-    for (int d = 0; d < dimension; ++d)
+    for (int d = 0; d < this->dimension; ++d)
         center[d] = (T)0;
 
     // compute geometric center
     for (int v = 0; v < n_vertices; ++v)
     {
-        for (int d = 0; d < dimension; ++d)
+        for (int d = 0; d < this->dimension; ++d)
         {
-            center[d] += ( (vertices->at(v)).x(d) / (T)n_vertices );
+            center[d] += ( (this->vertices.at(v)).x(d) / (T)n_vertices );
         }
-        local_info[v * (dimension+2) + dimension] = (T)work;
+        local_info[v * (this->dimension+2) + this->dimension] = (T)this->work.at(0);
     }
     for (int v = 0; v < n_vertices; ++v)
     {
-        for (int d = 0; d < dimension; ++d)
+        for (int d = 0; d < this->dimension; ++d)
         {
-            local_info[v * (dimension+2) + d] = center[d] - (vertices->at(v)).x(d);
+            local_info[v * (this->dimension+2) + d] = center[d] - (this->vertices.at(v)).x(d);
         }
     }
 
@@ -665,32 +587,32 @@ template <class T, class W> void ALL_Unstructured_LB<T,W>::balance()
 
     // compute for each vertex the maximum movement towards the center
 
-    local_info[ 0 * (dimension+2) + dimension+1 ] =
-        distance( vertices, 0, 1, 2, 4, local_rank);
+    local_info[ 0 * (this->dimension+2) + this->dimension+1 ] =
+        distance( &(this->vertices), 0, 1, 2, 4, this->local_rank);
 
-    local_info[ 1 * (dimension+2) + dimension+1 ] =
-        distance( vertices, 1, 0, 3, 5, local_rank);
+    local_info[ 1 * (this->dimension+2) + this->dimension+1 ] =
+        distance( &(this->vertices), 1, 0, 3, 5, this->local_rank);
 
-    local_info[ 2 * (dimension+2) + dimension+1 ] =
-        distance( vertices, 2, 3, 0, 6, local_rank);
+    local_info[ 2 * (this->dimension+2) + this->dimension+1 ] =
+        distance( &(this->vertices), 2, 3, 0, 6, this->local_rank);
 
-    local_info[ 3 * (dimension+2) + dimension+1 ] =
-        distance( vertices, 3, 2, 1, 7, local_rank);
+    local_info[ 3 * (this->dimension+2) + this->dimension+1 ] =
+        distance( &(this->vertices), 3, 2, 1, 7, this->local_rank);
 
-    local_info[ 4 * (dimension+2) + dimension+1 ] =
-        distance( vertices, 4, 5, 6, 0, local_rank);
+    local_info[ 4 * (this->dimension+2) + this->dimension+1 ] =
+        distance( &(this->vertices), 4, 5, 6, 0, this->local_rank);
 
-    local_info[ 5 * (dimension+2) + dimension+1 ] =
-        distance( vertices, 5, 4, 7, 1, local_rank);
+    local_info[ 5 * (this->dimension+2) + this->dimension+1 ] =
+        distance( &(this->vertices), 5, 4, 7, 1, this->local_rank);
 
-    local_info[ 6 * (dimension+2) + dimension+1 ] =
-        distance( vertices, 6, 7, 4, 2, local_rank);
+    local_info[ 6 * (this->dimension+2) + this->dimension+1 ] =
+        distance( &(this->vertices), 6, 7, 4, 2, this->local_rank);
 
-    local_info[ 7 * (dimension+2) + dimension+1 ] =
-        distance( vertices, 7, 6, 5, 3, local_rank);
+    local_info[ 7 * (this->dimension+2) + this->dimension+1 ] =
+        distance( &(this->vertices), 7, 6, 5, 3, this->local_rank);
 
     /*
-    if (local_rank == 4)
+    if (this->local_rank == 4)
     {
         std::cout << "max dists: " 
                   << local_info[ 0 * (dimension+2) + dimension+1 ] << " "
@@ -712,45 +634,45 @@ template <class T, class W> void ALL_Unstructured_LB<T,W>::balance()
 
     for (int v = 0; v < n_vertices; ++v)
     {
-        MPI_Iallgather(&local_info[v*(dimension+2)],dimension+2,mpi_data_type_T,
-                       &vertex_info[v*n_vertices*(dimension+2)],dimension+2,mpi_data_type_T,
+        MPI_Iallgather(&local_info[v*(this->dimension+2)],this->dimension+2,mpi_data_type_T,
+                       &vertex_info[v*n_vertices*(this->dimension+2)],this->dimension+2,mpi_data_type_T,
                        communicators[v],&request[v]);
     }
     MPI_Waitall(n_vertices,request,status);
 
     // compute new position for vertex 7 (if not periodic)
     T total_work = (T)0;
-    T shift_vectors[dimension * n_vertices];
+    T shift_vectors[this->dimension * n_vertices];
 
     for (int v = 0; v < n_vertices; ++v)
     {
-        for (int d = 0; d < dimension; ++d)
+        for (int d = 0; d < this->dimension; ++d)
         {
-            shift_vectors[v * dimension + d] = (T)0;
+            shift_vectors[v * this->dimension + d] = (T)0;
         }
     }
 
     // get total work
     for (int v = 0; v < n_vertices; ++v)
     {
-        if (vertex_info[(n_vertices-1)*n_vertices+v*(dimension+2)+dimension] > 1e-6)
-            total_work += vertex_info[(n_vertices-1)*n_vertices+v*(dimension+2)+dimension];
+        if (vertex_info[(n_vertices-1)*n_vertices+v*(this->dimension+2)+this->dimension] > 1e-6)
+            total_work += vertex_info[(n_vertices-1)*n_vertices+v*(this->dimension+2)+this->dimension];
     }
 
     // determine shortest distance between vertex and a neighbor 
     T cdist = std::max( 
                         1e-6,
-                        vertex_info[(n_vertices-1)*(n_vertices*(dimension+2))+0*(dimension+2)+dimension+1]
+                        vertex_info[(n_vertices-1)*(n_vertices*(this->dimension+2))+0*(this->dimension+2)+this->dimension+1]
                       );
 
     for (int v = 1; v < n_vertices; ++v)
     {
-        if ( vertex_info[(n_vertices-1)*(n_vertices*(dimension+2))+v*(dimension+2)+dimension+1] > 0 )
+        if ( vertex_info[(n_vertices-1)*(n_vertices*(this->dimension+2))+v*(this->dimension+2)+this->dimension+1] > 0 )
         {
             cdist = std::min
                     (
                         cdist,
-                        vertex_info[(n_vertices-1)*(n_vertices*(dimension+2))+v*(dimension+2)+dimension+1]
+                        vertex_info[(n_vertices-1)*(n_vertices*(this->dimension+2))+v*(this->dimension+2)+this->dimension+1]
                     );
         }
     }
@@ -762,44 +684,44 @@ template <class T, class W> void ALL_Unstructured_LB<T,W>::balance()
     {
         for (int i = 0; i < n_vertices; ++i)
         {
-            for (int d = 0; d < dimension; ++d)
+            for (int d = 0; d < this->dimension; ++d)
             {
-                if (local_coords[d] != global_dims[d] - 1)
+                if (this->local_coords.at(d) != this->global_dims.at(d) - 1)
                 {
                     //if ( vertex_info[(n_vertices-1)*(n_vertices*(dimension+2))+i*(dimension+2)+dimension] - ( total_work / (T)n_vertices )  > 0)                    
                     //{
-                        shift_vectors[(n_vertices-1)*dimension + d] += 
-                            ( vertex_info[(n_vertices-1)*(n_vertices*(dimension+2))+i*(dimension+2)+dimension] - ( total_work / (T)n_vertices ) ) *
-                            vertex_info[(n_vertices-1)*(n_vertices*(dimension+2))+i*(dimension+2)+d]  
-                            / ( (T)2.0 * gamma * total_work * (T)n_vertices);
+                        shift_vectors[(n_vertices-1)*this->dimension + d] += 
+                            ( vertex_info[(n_vertices-1)*(n_vertices*(this->dimension+2))+i*(this->dimension+2)+this->dimension] - ( total_work / (T)n_vertices ) ) *
+                            vertex_info[(n_vertices-1)*(n_vertices*(this->dimension+2))+i*(this->dimension+2)+d]  
+                            / ( (T)2.0 * this->gamma * total_work * (T)n_vertices);
                     //}
                 }
             }
         }
         // distance of shift
         T shift = (T)0;
-        for (int d = 0; d < dimension; ++d)
-            shift +=  shift_vectors[(n_vertices-1)*dimension + d] * shift_vectors[(n_vertices-1)*dimension + d];
+        for (int d = 0; d < this->dimension; ++d)
+            shift +=  shift_vectors[(n_vertices-1)*this->dimension + d] * shift_vectors[(n_vertices-1)*this->dimension + d];
         shift = sqrt(shift);
 
         if (std::abs(shift) >= 1e-6 && shift > cdist)
         {
             // normalize length of shift vector to half the distance to the nearest center
-            for (int d = 0; d < dimension; ++d)
-                shift_vectors[(n_vertices-1)*dimension + d] *= cdist / shift;
+            for (int d = 0; d < this->dimension; ++d)
+                shift_vectors[(n_vertices-1)*this->dimension + d] *= cdist / shift;
         }
 
     }
 
     for (int i = 0; i < n_vertices; ++i)
     {
-        MPI_Ibcast(&shift_vectors[i*dimension],dimension,mpi_data_type_T,0,communicators[i],&request[i]);
+        MPI_Ibcast(&shift_vectors[i*this->dimension],this->dimension,mpi_data_type_T,0,communicators[i],&request[i]);
     }
     MPI_Waitall(n_vertices,request,status);
 
-    shifted_vertices = vertices;
+    this->shifted_vertices = this->vertices;
     /*
-    if (local_rank == 7)
+    if (this->local_rank == 7)
     {
         std::cout << "center: ";
         for (int d = 0; d < dimension; ++d)
@@ -811,18 +733,18 @@ template <class T, class W> void ALL_Unstructured_LB<T,W>::balance()
     */
     for (int v = 0; v < n_vertices; ++v)
     {
-        //if (local_rank == 4) std::cout << "vertex " << v << ": ";
-        for (int d = 0; d < dimension; ++d)
+        //if (this->local_rank == 4) std::cout << "vertex " << v << ": ";
+        for (int d = 0; d < this->dimension; ++d)
         {
-            shifted_vertices->at(v).set_coordinate(d, vertices->at(v).x(d) + shift_vectors[v * dimension + d]);
-            if (shifted_vertices->at(v).x(d) != shifted_vertices->at(v).x(d))
+            this->shifted_vertices.at(v).set_coordinate(d, this->vertices.at(v).x(d) + shift_vectors[v * this->dimension + d]);
+            if (this->shifted_vertices.at(v).x(d) != this->shifted_vertices.at(v).x(d))
             {
-                std::cout << local_rank << " " << v << " " << d << std::endl;
+                std::cout << this->local_rank << " " << v << " " << d << std::endl;
                 MPI_Abort(MPI_COMM_WORLD,-200);
             }
-            //if (local_rank == 4) std::cout << shifted_vertices->at(v).x(d) << " " << shift_vectors[v * dimension + d] << " ";
+            //if (this->local_rank == 4) std::cout << shifted_vertices->at(v).x(d) << " " << shift_vectors[v * dimension + d] << " ";
         }
-        //if (local_rank == 4) std::cout << std::endl;
+        //if (this->local_rank == 4) std::cout << std::endl;
     }
 }
 
diff --git a/include/ALL_Voronoi.hpp b/include/ALL_Voronoi.hpp
index f79e2c3c7cfb51f1abc8e553dc7399e8256055d9..6c4b742e6870577c4c501ad94cf168856dcf4acf 100644
--- a/include/ALL_Voronoi.hpp
+++ b/include/ALL_Voronoi.hpp
@@ -48,90 +48,50 @@ POSSIBILITY OF SUCH DAMAGE.
 #include <algorithm>
 #include "ALL_CustomExceptions.hpp"
 #include "ALL_Point.hpp"
+#include "ALL_LB.hpp"
 
 #include "voro++.hh"
 
 // T: data type of vertices used in the balancer
 // W: data type of the work used in the balancer
-template <class T, class W> class ALL_Voronoi_LB
+template <class T, class W> class ALL_Voronoi_LB : public ALL_LB<T,W>
 {
     public:
         ALL_Voronoi_LB() {}
-        ALL_Voronoi_LB(int d, W w, T g) : dimension(d),
-                                         work(w), 
-                                         gamma(g)
+        ALL_Voronoi_LB(int d, W w, T g) : 
+            ALL_LB<T,W>(d,g)
         {
-            // only need the generator point
-            vertices = new T[2*dimension];
-            shifted_vertices = new T[dimension];
-            // set size for system size array
-            sys_size.resize(6);
-
-            loadbalancing_step = 0;
+            this->set_work(w);
         }
 
-        ~ALL_Voronoi_LB(); 
-
-        void set_vertices(T*);
-        void set_vertices(std::vector<ALL_Point<T>>&);
+        ~ALL_Voronoi_LB() override;
 
         voro::voronoicell vc;
 
         // setup communicators for the exchange of work in each domain
-        void setup(MPI_Comm);
+        void setup() override;
 
         // do a load-balancing step
-        virtual void balance();
+        virtual void balance(int step) override;
 
         // getter for variables (passed by reference to avoid
         // direct access to private members of the object)
 
-        // getter for base vertices
-        void get_vertices(T*);
-        // getter for shifted vertices
-        void get_shifted_vertices(T*);
-        // getter for shifted vertices
-        void get_shifted_vertices(std::vector<T>&);
-        // getter for shifted vertices
-        void get_shifted_vertices(std::vector<ALL_Point<T>>&);
-
         // neighbors
         // provide list of neighbors
-        virtual void get_neighbors(std::vector<int>&);
+        void get_neighbors(std::vector<int>&) override;
         // provide list of neighbors in each direction
-        virtual void get_neighbors(int**);
+        void get_neighbors(int**) override;
         // return generator points of the neighboring cells (to be able to recreate
         // local and neighboring cells)
-        virtual void get_neighbor_vertices(std::vector<T>&);
-
-        // set system size for periodic corrections
-        void set_sys_size(std::vector<T>&);
-
-        void set_step(int s) { loadbalancing_step = s; }
+        void get_neighbor_vertices(std::vector<T>&);
 
+        // empty override for setting method specific data
+        virtual void set_data(void*) override {}
+        
     private:
-        // dimension of the system
-        int dimension;
-
-        // shift cooeficient
-        T gamma;
-
-        // work per domain
-        W work;
-
-        // vertices
-        T* vertices; 
-
-        // vertices after load-balancing shift
-        T* shifted_vertices; 
-
         // MPI values
-        // global communicator
-        MPI_Comm global_comm;
-        // rank of the local domain in the communicator
-        int local_rank;
-
-        
+      
         // type for MPI communication
         MPI_Datatype mpi_data_type_T;
         MPI_Datatype mpi_data_type_W;
@@ -146,95 +106,24 @@ template <class T, class W> class ALL_Voronoi_LB
         // collection of generator points
         std::vector<T> generator_points;
 
-        // size of system (for periodic boxes / systems)
-        std::vector<T> sys_size;
-
         // number of ranks in global communicator
         int n_domains;
-
-        int loadbalancing_step;
 };
 
 template <class T, class W> ALL_Voronoi_LB<T,W>::~ALL_Voronoi_LB()
 {
-    if (vertices) delete vertices;
-    if (shifted_vertices) delete shifted_vertices;
-}
-
-template <class T, class W> void ALL_Voronoi_LB<T,W>::get_vertices(T* result)
-{
-    for (int i = 0; i < dimension; ++i)
-    {
-        result[i] = vertices[i];
-    }
-}
-
-template <class T, class W> void ALL_Voronoi_LB<T,W>::get_shifted_vertices(T* result)
-{
-    for (int i = 0; i < dimension; ++i)
-    {
-        result[i] = shifted_vertices[i];
-    }
-}
-
-// getter for shifted vertices
-template <class T, class W> void ALL_Voronoi_LB<T,W>::get_shifted_vertices(std::vector<ALL_Point<T>>& result)
-{
-    ALL_Point<T> p(
-                    3,
-                    { shifted_vertices[0],
-                      shifted_vertices[1],
-                      shifted_vertices[2] }
-                  ); 
-    result.clear();
-    result.push_back(p);
-}
-
-template <class T, class W> void ALL_Voronoi_LB<T,W>::get_shifted_vertices(std::vector<T>& result)
-{
-    for (int i = 0; i < dimension; ++i)
-    {
-        result.at(i) = shifted_vertices[i];
-    }
-}
-
-// set the actual vertices (unsafe due to array copy)
-template <class T, class W> void ALL_Voronoi_LB<T,W>::set_vertices(T* v)
-{
-    for (int i = 0; i < 2*dimension; ++i)
-    {
-        vertices[i] = v[i];
-    }
-}
-
-// set the actual vertices
-template <class T, class W> void ALL_Voronoi_LB<T,W>::set_vertices(std::vector<ALL_Point<T>>& _v)
-{
-    for (int v = 0; v < 2; ++v)
-        for (int i = 0; i < dimension; ++i)
-        {
-            vertices[dimension * v + i] = _v.at(v).x(i);
-        }
-}
-
-template <class T, class W> void ALL_Voronoi_LB<T,W>::set_sys_size(std::vector<T>& ss)
-{
-    for (auto i = 0; i < sys_size.size(); ++i)
-    {
-        sys_size.at(i) = ss.at(i);
-    }
 }
 
 // setup routine for the tensor-based load-balancing scheme
 // requires: 
-//              global_comm (int): cartesian MPI communicator, from
+//              this->global_comm (int): cartesian MPI communicator, from
 //                                 which separate sub communicators
 //                                 are derived in order to represent
 //                                 each plane of domains in the system
-template <class T, class W> void ALL_Voronoi_LB<T,W>::setup(MPI_Comm comm)
+template <class T, class W> void ALL_Voronoi_LB<T,W>::setup()
 {
     // store global communicator
-    global_comm = comm; 
+    int dimension = this->get_dimension();
 
     // no special communicator required
 
@@ -244,8 +133,8 @@ template <class T, class W> void ALL_Voronoi_LB<T,W>::setup(MPI_Comm comm)
     //       of the local cell -> large-scale Voronoi grid creation
     //                            is too expensive to be done
     //                            every step
-    MPI_Comm_size(global_comm, &n_domains);
-    MPI_Comm_rank(global_comm, &local_rank);
+    MPI_Comm_size(this->global_comm, &n_domains);
+    MPI_Comm_rank(this->global_comm, &this->local_rank);
     generator_points.resize(n_domains*(2*dimension+1));
 
     // determine correct MPI data type for template T
@@ -262,32 +151,37 @@ template <class T, class W> void ALL_Voronoi_LB<T,W>::setup(MPI_Comm comm)
 
 }
 
-template<class T, class W> void ALL_Voronoi_LB<T,W>::balance()
+template<class T, class W> void ALL_Voronoi_LB<T,W>::balance(int loadbalancing_step)
 {
+    int dimension = this->get_dimension();
     // collect local information in array
     int info_length = 2*dimension+1;
     std::vector<T> local_info(info_length);
+    std::vector<ALL_Point<T>>& vertices = this->get_vertices();
+    std::vector<ALL_Point<T>>& shifted_vertices = this->get_shifted_vertices();
+    std::vector<W>& work = this->get_work();
     
-    for (auto d = 0; d < 2*dimension; ++d)
-    {
-        local_info.at(d) = vertices[d];
-    }
-    local_info.at(2*dimension) = (T)work;
+    for (auto i = 0; i < 2; ++i)
+        for (auto d = 0; d < dimension; ++d)
+        {
+            local_info.at(d) = vertices.at(i).x(d);
+        }
+    local_info.at(2*dimension) = (T)work.at(0);
 
     
     MPI_Allgather(local_info.data(), info_length, mpi_data_type_T, 
                   generator_points.data(), info_length, mpi_data_type_T,
-                  global_comm);
+                  this->global_comm);
 
     // create Voronoi-cells
     // TODO: check if system is periodic or not -> true / false!
     voro::container con_old(
-                sys_size.at(0),
-                sys_size.at(1),
-                sys_size.at(2),
-                sys_size.at(3),
-                sys_size.at(4),
-                sys_size.at(5),
+                this->sys_size.at(0),
+                this->sys_size.at(1),
+                this->sys_size.at(2),
+                this->sys_size.at(3),
+                this->sys_size.at(4),
+                this->sys_size.at(5),
                 ALL_VORONOI_SUBBLOCKS,
                 ALL_VORONOI_SUBBLOCKS,
                 ALL_VORONOI_SUBBLOCKS,
@@ -325,7 +219,7 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance()
 
     loadbalancing_step++;
 
-    if (local_rank == 0)
+    if (this->local_rank == 0)
     {
         con_old.draw_particles(ss_local_gp_gnu.str().c_str());
         con_old.draw_cells_gnuplot(ss_local_vc_gnu.str().c_str());
@@ -362,7 +256,7 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance()
                 cl_old.pos(pid,x,y,z,r);
                 if (i == 0)
                 {
-                    if (pid == local_rank)
+                    if (pid == this->local_rank)
                     {
                         con_old.compute_cell(c,cl_old);
                         c.neighbors(next_neighbors.at(idx));
@@ -394,7 +288,7 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance()
         std::sort(neighbors.begin(),neighbors.end());
         auto uniq_it = std::unique(neighbors.begin(), neighbors.end());
         neighbors.resize( std::distance(neighbors.begin(), uniq_it) );
-        neighbors.erase(std::remove(neighbors.begin(),neighbors.end(),local_rank),neighbors.end());
+        neighbors.erase(std::remove(neighbors.begin(),neighbors.end(),this->local_rank),neighbors.end());
         neighbors.erase(std::remove_if(neighbors.begin(),neighbors.end(),[](int x){return x < 0;}),neighbors.end());
     }
 
@@ -424,7 +318,7 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance()
                 con_old.compute_cell(c,cl_old);
                 volumes.at(idx) = c.volume();
             }
-            if (pid == local_rank)
+            if (pid == this->local_rank)
             {
                 con_old.compute_cell(c,cl_old);
                 local_volume = c.volume();
@@ -432,16 +326,16 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance()
         }
         while(cl_old.inc());
     }
-
-    MPI_Barrier(global_comm);
-    if(local_rank == 0) std::cout << "still sane?" << std::endl;
-
+#ifdef ALL_DEBUG_ENABLED
+    MPI_Barrier(this->global_comm);
+    if(this->local_rank == 0) std::cout << "ALL_Voronoi<T,W>::balance begin checking..." << std::endl;
+#endif
     /* 
     for (int i = 0; i < 25; ++i)
     {
-        if (local_rank == i)
+        if (this->local_rank == i)
         {
-            std::cout << local_rank << ": ";
+            std::cout << this->local_rank << ": ";
             for (auto n : neighbors)
                 std::cout << " " << n << " ";
             std::cout << "| " << neighbors.size() << std::endl;
@@ -449,7 +343,7 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance()
                 std::cout << " " << s << " ";
             std::cout << std::endl;
         }
-        MPI_Barrier(global_comm);
+        MPI_Barrier(this->global_comm);
     }
     */
 
@@ -465,6 +359,11 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance()
         work_load += generator_points.at(info_length * neighbor + info_length - 1);
     }
 
+#ifdef ALL_DEBUG_ENABLED
+    MPI_Barrier(this->global_comm);
+    if(this->local_rank == 0) std::cout << "ALL_Voronoi<T,W>::balance find neighbors..." << std::endl;
+#endif
+
     T max_diff = 20.0;
 
     for (auto it = neighbors.begin(); it != neighbors.end(); ++it)
@@ -477,14 +376,14 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance()
             diff.at(d) = generator_points.at(info_length * neighbor + dimension + d) - local_info.at(d);
             //diff.at(d) = generator_points.at(info_length * neighbor + d) - local_info.at(d);
 
-            if (diff.at(d) > 0.5 * (sys_size[2*d+1] - sys_size[2*d]))
+            if (diff.at(d) > 0.5 * (this->sys_size[2*d+1] - this->sys_size[2*d]))
             {
-                diff.at(d) -= (sys_size[2*d+1] - sys_size[2*d]);
+                diff.at(d) -= (this->sys_size[2*d+1] - this->sys_size[2*d]);
                 correct = false;
             }
-            else if (diff.at(d) < -0.5 * (sys_size[2*d+1] - sys_size[2*d]))
+            else if (diff.at(d) < -0.5 * (this->sys_size[2*d+1] - this->sys_size[2*d]))
             {
-                diff.at(d) += (sys_size[2*d+1] - sys_size[2*d]);
+                diff.at(d) += (this->sys_size[2*d+1] - this->sys_size[2*d]);
                 correct = false;
             }
 
@@ -517,19 +416,24 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance()
             for (int d = 0; d < dimension; ++d)
             {
                 //shift.at(d) += 0.5 * work_diff / work_density * surfaces.at(std::distance(neighbors.begin(),it)) * diff.at(d);
-                shift.at(d) += 0.5 * work_diff * diff.at(d) / gamma;
+                shift.at(d) += 0.5 * work_diff * diff.at(d) / this->gamma;
             }
         }
     }
 
+#ifdef ALL_DEBUG_ENABLED
+    MPI_Barrier(this->global_comm);
+    if(this->local_rank == 0) std::cout << "ALL_Voronoi<T,W>::balance after neighbors found..." << std::endl;
+#endif
+
     norm = sqrt ( shift.at(0) * shift.at(0) + shift.at(1) * shift.at(1) + shift.at(2) * shift.at(2) );
 
     /*
     for (int i = 0; i < 25; ++i)
     {
-        if (local_rank == i)
+        if (this->local_rank == i)
         {
-            std::cout << local_rank << ": ";
+            std::cout << this->local_rank << ": ";
             for (auto n : local_info)
                 std::cout << " " << n << " ";
             std::cout << " | ";
@@ -537,15 +441,20 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance()
                 std::cout << " " << n << " ";
             std::cout << std::endl;
         }
-        MPI_Barrier(global_comm);
+        MPI_Barrier(this->global_comm);
     }
     */
 
     T scale = 1.0;
 
-   if (norm > 0.45 * max_diff)
+    if (norm > 0.45 * max_diff)
         scale = 0.45 * max_diff / norm;
         
+#ifdef ALL_DEBUG_ENABLED
+    MPI_Barrier(this->global_comm);
+    if(this->local_rank == 0) std::cout << "ALL_Voronoi<T,W>::balance find new neighbors..." << std::endl;
+#endif
+
 
     // to find new neighbors
     for (int d = 0; d < dimension; ++d)
@@ -561,26 +470,31 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance()
                                     :local_info.at(d));
         */
         // periodic correction of points
-        local_info.at(d) = (local_info.at(d) < sys_size.at(2*d))
-                                ?sys_size.at(2*d)+1.0
-                                : ((local_info.at(d) >= sys_size.at(2*d+1))
-                                    ?sys_size.at(2*d+1)-1.0
+        local_info.at(d) = (local_info.at(d) < this->sys_size.at(2*d))
+                                ?this->sys_size.at(2*d)+1.0
+                                : ((local_info.at(d) >= this->sys_size.at(2*d+1))
+                                    ?this->sys_size.at(2*d+1)-1.0
                                     :local_info.at(d));
     }
 
     MPI_Allgather(local_info.data(), info_length, mpi_data_type_T, 
                   generator_points.data(), info_length, mpi_data_type_T,
-                  global_comm);
+                  this->global_comm);
+
+#ifdef ALL_DEBUG_ENABLED
+    MPI_Barrier(this->global_comm);
+    if(this->local_rank == 0) std::cout << "ALL_Voronoi<T,W>::balance create new containers..." << std::endl;
+#endif
 
     // create Voronoi-cells
     // TODO: check if system is periodic or not -> true / false!
     voro::container con_new(
-                sys_size.at(0),
-                sys_size.at(1),
-                sys_size.at(2),
-                sys_size.at(3),
-                sys_size.at(4),
-                sys_size.at(5),
+                this->sys_size.at(0),
+                this->sys_size.at(1),
+                this->sys_size.at(2),
+                this->sys_size.at(3),
+                this->sys_size.at(4),
+                this->sys_size.at(5),
                 ALL_VORONOI_SUBBLOCKS,
                 ALL_VORONOI_SUBBLOCKS,
                 ALL_VORONOI_SUBBLOCKS,
@@ -618,7 +532,7 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance()
 
     loadbalancing_step++;
 
-    if (local_rank == 0)
+    if (this->local_rank == 0)
     {
         con_new.draw_particles(ss_local_gp_gnu2.str().c_str());
         con_new.draw_cells_gnuplot(ss_local_vc_gnu2.str().c_str());
@@ -627,6 +541,11 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance()
     }
 */
 
+#ifdef ALL_DEBUG_ENABLED
+    MPI_Barrier(this->global_comm);
+    if(this->local_rank == 0) std::cout << "ALL_Voronoi<T,W>::balance storing shifted vertices..." << std::endl;
+#endif
+
     for (int d = 0; d < dimension; ++d)
        shifted_vertices[d] = local_info.at(d);
 
@@ -651,7 +570,7 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance()
                 cl_new.pos(pid,x,y,z,r);
                 if (i == 0)
                 {
-                    if (pid == local_rank)
+                    if (pid == this->local_rank)
                     {
                         con_new.compute_cell(c,cl_new);
                         c.neighbors(next_neighbors.at(idx));
@@ -680,7 +599,7 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance()
         std::sort(neighbors.begin(),neighbors.end());
         auto uniq_it = std::unique(neighbors.begin(), neighbors.end());
         neighbors.resize( std::distance(neighbors.begin(), uniq_it) );
-        neighbors.erase(std::remove(neighbors.begin(),neighbors.end(),local_rank),neighbors.end());
+        neighbors.erase(std::remove(neighbors.begin(),neighbors.end(),this->local_rank),neighbors.end());
         neighbors.erase(std::remove_if(neighbors.begin(),neighbors.end(),[](int x){return x < 0;}),neighbors.end());
     }
 
@@ -722,9 +641,9 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance()
 /*
     for (int i = 0; i < 25; ++i)
     {
-        if (local_rank == i)
+        if (this->local_rank == i)
         {
-            std::cout << local_rank << ": ";
+            std::cout << this->local_rank << ": ";
             for (auto n : neighbors)
                 std::cout << " " << n << " ";
             std::cout << "| " << neighbors.size() << std::endl;
@@ -735,7 +654,7 @@ template<class T, class W> void ALL_Voronoi_LB<T,W>::balance()
             }
             std::cout << std::endl;
         }
-        MPI_Barrier(global_comm);
+        MPI_Barrier(this->global_comm);
     }
 */
 
diff --git a/src/ALL_fortran.cpp b/src/ALL_fortran.cpp
index 7ab556dc02b5526768bdb94f4187558e05c70c45..80494ad151333e6e64166080ad9cca41d4b444d1 100644
--- a/src/ALL_fortran.cpp
+++ b/src/ALL_fortran.cpp
@@ -34,9 +34,9 @@ extern "C"
 {
     // wrapper to create a ALL<double,double> object (should be the
     // most commonly used one)
-    ALL<double,double>* ALL_init_f(const int dim, double gamma)
+    ALL<double,double>* ALL_init_f(ALL_LB_t method, const int dim, double gamma)
     {
-        return new ALL<double,double>(dim,gamma);
+        return new ALL<double,double>(method,dim,gamma);
     }
 
     // set grid parameters
@@ -74,15 +74,15 @@ extern "C"
     }
 
     // wrapper to setup routine
-    void ALL_setup_f(ALL<double,double>* all_obj, ALL_LB_t method)
+    void ALL_setup_f(ALL<double,double>* all_obj)
     {
-        all_obj->setup(method);
+        all_obj->setup();
     }
 
     // wrapper to call balance routine
-    void ALL_balance_f(ALL<double,double>* all_obj, ALL_LB_t method)
+    void ALL_balance_f(ALL<double,double>* all_obj)
     {
-        all_obj->balance(method);
+        all_obj->balance();
     }
 
     // wrapper to get number of new vertices
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index f223173decfc0aafeb13549d37660352ac0374f4..e94f5e6df07d104718b856ff60f05d2a1361d0e5 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,6 +1,7 @@
 set(ALL_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/include)
-set(ALL_HEADER_FILES ${ALL_INCLUDE_DIR}/ALL.hpp 
-                     ${ALL_INCLUDE_DIR}/ALL_Staggered.hpp 
+set(ALL_HEADER_FILES ${ALL_INCLUDE_DIR}/ALL.hpp
+                     ${ALL_INCLUDE_DIR}/ALL_LB.hpp
+                     ${ALL_INCLUDE_DIR}/ALL_Staggered.hpp
                      ${ALL_INCLUDE_DIR}/ALL_Tensor.hpp
                      ${ALL_INCLUDE_DIR}/ALL_Unstructured.hpp
                      ${ALL_INCLUDE_DIR}/ALL_Voronoi.hpp