diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index b76b7ebdac07ff2b833d7791b35731943c5e9bf0..1673251e5d379cf649a00f5bac85ac210bb02d06 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -37,7 +37,7 @@ badges:
 
 build-mpich-nofortran-gcc8:
     stage: build_mpich_gcc8
-    image: gitlab.version.fz-juelich.de:5555/slms/loadbalancing/mpich-gcc8_mpi
+    image: gitlab.version.fz-juelich.de:5555/slms/loadbalancing/mpich-gcc8-full
     tags:
         - public-docker
     when: always
@@ -82,7 +82,7 @@ build-mpich-nofortran-gcc10:
 
 build-mpich-fortran-gcc8:
     stage: build_mpich_gcc8
-    image: gitlab.version.fz-juelich.de:5555/slms/loadbalancing/mpich-gcc8_mpi
+    image: gitlab.version.fz-juelich.de:5555/slms/loadbalancing/mpich-gcc8-full
     tags:
         - public-docker
     when: always
@@ -127,7 +127,7 @@ build-mpich-fortran-gcc10:
 
 build-mpich-fortran-f08-gcc8:
     stage: build_mpich_gcc8
-    image: gitlab.version.fz-juelich.de:5555/slms/loadbalancing/mpich-gcc8_mpi
+    image: gitlab.version.fz-juelich.de:5555/slms/loadbalancing/mpich-gcc8-full
     tags:
         - public-docker
     when: always
@@ -138,7 +138,7 @@ build-mpich-fortran-f08-gcc8:
         when: always
         paths:
             - badges/
-    allow_failure: false
+    allow_failure: true
 
 build-mpich-fortran-f08-gcc9:
     stage: build_mpich_gcc9
@@ -172,7 +172,7 @@ build-mpich-fortran-f08-gcc10:
     
 build-mpich-voronoi-gcc8:
     stage: build_mpich_gcc8
-    image: gitlab.version.fz-juelich.de:5555/slms/loadbalancing/mpich-gcc8_mpi
+    image: gitlab.version.fz-juelich.de:5555/slms/loadbalancing/mpich-gcc8-full
     tags:
         - public-docker
     when: always
@@ -397,12 +397,12 @@ build-openmpi-voronoi-gcc10:
 
 build-mpich-complete-gcc8:
     stage: build_mpich_gcc8
-    image: gitlab.version.fz-juelich.de:5555/slms/loadbalancing/mpich-gcc8_mpi-vtk-doxy2:latest
+    image: gitlab.version.fz-juelich.de:5555/slms/loadbalancing/mpich-gcc8-full:latest
     tags:
         - public-docker
     when: always
     script:
-        - ./ci/build_mpi_complete.sh
+        - ./ci/build_mpi_complete_gcc8.sh
     artifacts:
         name: pages
         when: always
diff --git a/CMakeLists.txt b/CMakeLists.txt
index cdb7913cb63520014b2c8941365b40a791b660f2..bb64ca4f07e0278f01dd69661c9e0aa7be222d55 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,10 +1,12 @@
-cmake_minimum_required (VERSION 3.10)
+cmake_minimum_required (VERSION 3.14)
 
 option(CM_ALL_VTK_OUTPUT "VTK output routine" OFF)
 option(CM_ALL_VORONOI "Voronoi-based loadbalancing scheme (requires Voro++)" OFF)
 option(CM_ALL_FORTRAN "VTK output routine" OFF)
 option(CM_ALL_DEBUG "Enable debug information" OFF)
 option(CM_ALL_USE_F08 "Use some Fortran 2008 features (mpi_f08)" OFF)
+option(CM_ALL_FORTRAN_ERROR_ABORT "Abort on any error when using the
+Fortran interface" OFF)
 option(CM_ALL_TESTS "Enables test suite" OFF)
 option(CM_ALL_AUTO_DOC "Enables creation of auto-documentation")
 
@@ -13,35 +15,52 @@ option(CM_ALL_AUTO_DOC "Enables creation of auto-documentation")
 cmake_policy(SET CMP0004 OLD)
 
 enable_language(CXX)
+enable_language(C)
+
+if (CM_ALL_USE_F08)
+    if (NOT CM_ALL_FORTRAN)
+        message(FATAL_ERROR "Flag for Fortran 08 MPI interface set, while Fortran interface is not enabled!")
+    endif ()
+endif ()
 
 if (CM_ALL_FORTRAN)
     enable_language(Fortran)
+# todo(s.schulz): This should be extended for other compilers, if they
+#                 support standard checking. The Intel Fortran compiler
+#                 does not.
+    if(CM_ALL_USE_F08)
+        add_compile_options($<$<AND:$<COMPILE_LANGUAGE:Fortran>,$<Fortran_COMPILER_ID:GNU>>:-std=f2008>)
+    else()
+        add_compile_options($<$<AND:$<COMPILE_LANGUAGE:Fortran>,$<Fortran_COMPILER_ID:GNU>>:-std=f2003>)
+    endif()
+    add_compile_options($<$<AND:$<COMPILE_LANGUAGE:Fortran>,$<Fortran_COMPILER_ID:GNU>>:-fbacktrace>)
+    add_compile_options($<$<AND:$<COMPILE_LANGUAGE:Fortran>,$<Fortran_COMPILER_ID:Intel>>:-traceback>)
 endif (CM_ALL_FORTRAN)
 
 # set standards compliance flags
 set(CMAKE_CXX_STANDARD 11)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSIONS OFF)
-# todo(s.schulz): This should be extended for other compilers, if they
-#                 support standard checking. The Intel Fortran compiler
-#                 does not.
-if(CM_ALL_USE_F08)
-    add_compile_options($<$<AND:$<COMPILE_LANGUAGE:Fortran>,$<Fortran_COMPILER_ID:GNU>>:-std=f2008>)
-else()
-    add_compile_options($<$<AND:$<COMPILE_LANGUAGE:Fortran>,$<Fortran_COMPILER_ID:GNU>>:-std=f2003>)
-endif()
-add_compile_options($<$<AND:$<COMPILE_LANGUAGE:Fortran>,$<Fortran_COMPILER_ID:GNU>>:-fbacktrace>)
-add_compile_options($<$<AND:$<COMPILE_LANGUAGE:Fortran>,$<Fortran_COMPILER_ID:Intel>>:-traceback>)
-add_compile_options("$<$<AND:$<CONFIG:Debug>,$<COMPILE_LANGUAGE:Fortran>,$<Fortran_COMPILER_ID:GNU>>:-Wall;-Wextra>")
+if (CM_ALL_FORTRAN)
+    add_compile_options("$<$<AND:$<CONFIG:Debug>,$<COMPILE_LANGUAGE:Fortran>,$<Fortran_COMPILER_ID:GNU>>:-Wall;-Wextra>")
+endif (CM_ALL_FORTRAN)
 add_compile_options("$<$<AND:$<CONFIG:Debug>,$<COMPILE_LANGUAGE:C>,$<C_COMPILER_ID:GNU>>:-Wall;-Wextra>")
 add_compile_options("$<$<AND:$<CONFIG:Debug>,$<COMPILE_LANGUAGE:CXX>,$<CXX_COMPILER_ID:GNU>>:-Wall;-Wextra>")
 
-project(ALL)
+project(ALL
+    VERSION "0.1.0"
+    DESCRIPTION "A Loadbalacing Library"
+    HOMEPAGE_URL "localhost")
 
 # add custom find-scripts to module path
 set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
 
-find_package(MPI REQUIRED)
+if (CM_ALL_FORTRAN)
+    find_package(MPI REQUIRED COMPONENTS CXX Fortran)
+else()
+    find_package(MPI REQUIRED COMPONENTS CXX)
+endif (CM_ALL_FORTRAN)
+
 
 if(CM_ALL_VTK_OUTPUT)
     message("Using VTK output")
@@ -65,6 +84,10 @@ if (CM_ALL_DEBUG)
     add_compile_options("-DALL_DEBUG_ENABLED")
 endif(CM_ALL_DEBUG)
 
+if(CM_ALL_FORTRAN_ERROR_ABORT)
+    add_compile_options("-DALL_FORTRAN_ERROR_ABORT")
+endif()
+
 if(CM_ALL_USE_F08 AND MPI_Fortran_HAVE_F08_MODULE)
     add_compile_options("-DALL_USE_F08")
 elseif(CM_ALL_USE_F08 AND NOT MPI_Fortran_HAVE_F08_MODULE)
diff --git a/README.md b/README.md
index 06ba3fef41465148a078c27696dd5fc48680f2da..002c9b12a67799397641df13ecde08e9737527e4 100644
--- a/README.md
+++ b/README.md
@@ -69,7 +69,7 @@ Installation & Requirements:
     Base requirements:
         C++11
         MPI
-        CMake 3.10+
+        CMake 3.14+
 
     Optional requirements:
         Fortran 2003
diff --git a/README_new.md b/README_new.md
new file mode 100644
index 0000000000000000000000000000000000000000..89cc34cba214f06688765650d077b67ee6e1d93f
--- /dev/null
+++ b/README_new.md
@@ -0,0 +1,422 @@
+# A Load Balancing Library (ALL)
+
+The library aims to provide an easy way to include dynamic domain-based
+load balancing into particle based simulation codes. The library is
+developed in the Simulation Laboratory Molecular Systems of the Jülich
+Supercomputing Centre at Forschungszentrum Jülich. 
+
+It includes several load balancing schemes. The following list gives and
+overview about these schemes and short explanations.
+
+ - **Tensor product**: The work on all processes is reduced over the
+   cartesian planes in the systems. This work is then equalized by
+   adjusting the borders of the cartesian planes.
+ - **Staggered grid**: A 3-step hierarchical approach is applied, where:
+   1. work over the cartesian planes is reduced, before the borders of
+      these planes are adjusted
+   2. in each of the cartesian planes the work is reduced for each
+      cartesian column. These columns are then adjusted to each other to
+      homogenize the work in each column
+   3. the work between neighboring domains in each column is adjusted.
+      Each adjustment is done locally with the neighboring planes,
+      columns or domains by adjusting the adjacent boundaries.
+ - **Topological mesh** (WIP): In contrast to the previous methods this
+   method adjusts domains not by moving boundaries but vertices, i.e.
+   corner points, of domains. For each vertex a force, based on the
+   differences in work of the neighboring domains, is computed and the
+   vertex is shifted in a way to equalize the work between these
+   neighboring domains.
+ - **Voronoi mesh** (WIP): Similar to the topological mesh method, this
+   method computes a force, based on work differences. In contrast to
+   the topological mesh method, the force acts on a Voronoi point rather
+   than a vertex, i.e. a point defining a Voronoi cell, which describes
+   the domain. Consequently, the number of neighbors is not a conserved
+   quantity, i.e. the topology may change over time. ALL uses the Voro++
+   library published by the Lawrence Berkeley Laboratory for the
+   generation of the Voronoi mesh.
+ - **Histogram based staggered grid**: Resulting in the same grid, as
+   the staggered grid scheme, this scheme uses the cumulative work
+   function in each of the three cartesian directions in order to
+   generate this grid. Using histograms and the previously defined
+   distribution of process domains in a cartesian grid, this scheme
+   generates in three steps a staggered-grid result, in which the work
+   is distributed as evenly as the resolution of the underlying
+   histogram allows. In contrast to the previously mentioned schemes
+   this scheme depends on a global exchange of work between processes.
+ - **Orthogonal recursive bisection** (Planned): Comparable to the
+   histogram based staggered grid scheme, this scheme uses cumulative
+   work functions evaluated by histograms in order to find a new
+   distribution of workload in a hierarchical manner. In contrast to the
+   histogram based staggered grid scheme, the subdivision of domains is
+   not based on a cartesian division of processes, but on the prime
+   factorization of the total number of processes. During each step, the
+   current slice of the system is distributed into smaller sub-slices
+   along the longest edge, that roughly contain the same workload. The
+   number of sub-slices in then determined by the corresponding prime
+   number of that step, i.e. when trying to establish a distribution for
+   24 (3 * 2 * 2 * 2) processes, the bisection would use four steps. In
+   the first step the system would be subdivided into 3 sub domains
+   along the largest edge, each containing roughly the same workload.
+   Each of these sub domains then is divided into two sub sub domains
+   independently. This procedure is then repeated for all the prime
+   numbers in the prime factorization.
+
+
+## Installation and Requirements
+
+### Requirements
+Base requirements:
+
+ - C++11 capable compiler
+ - MPI support
+ - CMake v. 3.14 or higher
+
+Optional requirements:
+
+ - Fortran 2003 capable compiler (Fortran interface)
+ - Fortran 2008 capable compiler (Usage of the `mpi_f08` interface)
+ - VTK 7.1 or higher (Domain output)
+ - Boost testing utilities
+ - Doxygen and Sphinx with `breathe` (Documentation)
+
+### Installation
+
+ 1. Clone the library from
+    `https://gitlab.version.fz-juelich.de/SLMS/loadbalancing` into
+    `$ALL_ROOT_DIR`.
+ 2. Create the build directory `$ALL_BUILD_DIR` some place else.
+ 3. Call `cmake -S "$ALL_ROOT_DIR" -B "$ALL_BUILD_DIR"` to set up the
+    installation. Compile time features can be selected from:
+     - `-DCMAKE_INSTALL_PREFIX=$ALL_INSTALL_DIR` (default: system
+       dependant): Install into `$ALL_INSTALL_DIR` after compiling.
+     - `-DCM_ALL_VTK_OUTPUT=ON` (default: `OFF`): Enable VTK output
+       of domains. Requires VTK 7.1 or higher.
+     - `-DCM_ALL_FORTRAN=ON` (default: `OFF`): Enable Fortran interface.
+       Requires Fortran 2003 capable compiler.
+     - `-DCM_ALL_USE_F08=ON` (default: `OFF`): Enable usage of `mpi_f08`
+       module for MPI. Requires Fortran 2008 capable compiler and
+       compatible MPI installation.
+     - `-DCM_ALL_FORTRAN_ERROR_ABORT=ON` (default: `OFF): Abort
+       execution on any error when using the Fortran interface instead
+       of setting the error number and leaving error handling to the
+       user.
+     - `-DCM_ALL_VORONOI=ON` (default: `OFF`): Enable Voronoi mesh
+       method and subsequently compilation and linkage of Voro++.
+     - `-DCM_ALL_TESTING=ON` (default: `OFF`): Turns on unit and feature
+       tests. Can be run from the build directory with `ctest`. Requires
+       the Boost test utilities.
+     - `-DCMAKE_BUILD_TYPE=Debug` (default: `Release`): Enable library
+       internal debugging features. Using `DebugWithOpt` also turns on
+       some optimizations.
+     - `-DCM_ALL_DEBUG=ON` (default: `OFF`): Enable self consistency
+       checks within the library itself.
+     - `-DVTK_DIR=$VTK_DIR` (default: system dependant): Path to an
+       explicit VTK installation. Make sure that VTK is compiled with
+       the same MPI as the library and subsequently your code.
+    To use a specific compiler and Boost installation use: `CC=gcc
+    CXX=g++ BOOST_ROOT=$BOOST_DIR cmake [...]`.
+  4. To build and install the library then run: `cmake --build
+     "$ALL_BUILD_DIR"` and `cmake --install "$ALL_BUILD_DIR"`.
+     Afterwards, the built examples and library files are placed in
+     `$ALL_INSTALL_DIR`.
+
+
+## Usage
+The library is presented as a single load balancing object `ALL::ALL`
+from `ALL.hpp`. All classes are encapsulated in the `ALL` name space.
+Simple (and not so simple) examples are available in the `/examples` sub
+directory. The simple examples are also documented at length.
+
+Errors are treated as exceptions that are thrown of a (sub) class of
+`ALL::CustomException`. Likely candidates that may throw are `balance`,
+`setup`, `printVTKoutlines`.
+
+### ALL object
+The ALL object can be constructed with
+
+    ALL::ALL<T,W> (
+                   const ALL::LB_t method,
+                   const int dimension,
+                   const T gamma)
+
+Where the type of the boundaries is given by `T` and the type of the
+work as `W`, which are usually float or double. The load balancing
+method must be one of `ALL::LB_t::TENSOR`, `...STAGGERED`,
+`...FORCEBASED`, `...VORONOI`, `...HISTOGRAM`. Where the Voronoi
+method must be enabled at compile time. There is also a second form
+where the initial domain vertices are given:
+
+    ALL::ALL<T,W> (
+                   const ALL::LB_t method,
+                   const int dimension,
+                   const std::vector<ALL::Point<T>> &initialDomain,
+                   const T gamma)
+
+### Point object
+The domain boundaries are given and retrieved as a vector of
+`ALL::Point`s. These can be constructed via
+
+    ALL::Point<T> ( const int dimension )
+    ALL::Point<T> ( const int dimension, const T *values )
+    ALL::Point<T> ( const std::vector<T> &values )
+
+and accessed via the `[]` operator like a `std::vector`
+
+### Set up
+A few additional parameters must be set before the domains can be
+balanced. Exactly which are dependent on the method used, but in general
+the following are necessary
+
+    void ALL::ALL<T,W>::setVertices ( std::vector<ALL::Point<T>> &vertices )
+    void ALL::ALL<T,W>::setCommunicator ( MPI_Comm comm )
+    void ALL::ALL<T,W>::setWork ( const W work )
+
+If the given communicator is not a cartesian communicator the process
+grid parameters must also be set beforehand (!) using
+
+    void ALL::ALL<T,W>::setProcGridParams (
+            const std::vector<int> &location,
+            const std::vector<int> &size)
+
+with the location of the current process and number of processes in each
+direction.
+
+To trace the current domain better an integer tag can be given, that is
+used in the domain output, with
+
+    void ALL::ALL<T,W>::setProcTag ( int tag )
+
+and the minimal domain size in each direction can be set using
+
+    void ALL::ALL<T,W>::setMinDomainSize (
+            const std::vector<T> &minSize )
+ 
+Once these parameters are set call
+
+    void ALL::ALL<T,W>::setup()
+
+so internal set up routines can run.
+
+### Balancing
+To create the new domain vertices make sure the current vertices are set
+using `setVertices` and the work is set using `setWork` then call
+
+    void ALL::ALL<T,W>::balance()
+
+and then retrieve the new vertices using
+
+    std::vector<ALL::Point<T>> &ALL::ALL<T,W>::getVertices ()
+
+or new neighbors with
+
+    std::vector<int> &ALL::ALL<T,W>::getNeighbors ()
+
+### Special considerations for the Fortran interface
+The Fortran interface exists in two versions. Either in the form of
+
+    ALL_balance(all_object)
+
+where the all object of type `type(ALL_t)` is given as first argument to
+each function and as more object oriented functions to the object
+itself. So the alternative call would be
+
+    all_object%balance()
+
+The function names themselves are similar, i.e. instead of camel case
+they are all lowercase except the first `ALL_` with underscores between
+words. So `setProcGridParams` becomes `ALL_set_proc_grid_parms`. For
+more usage info please check the Fortran example programs.
+
+One important difference is the handling of MPI types, which change
+between the MPI Fortran 2008 interface as given by the `mpi_f08` module
+and previous interfaces. In previous interfaces all MPI types are
+`INTEGER`s, but using the `mpi_f08` module the communicator is of
+`type(MPI_Comm)`. To allow using `ALL_set_communicator` directly with
+the communicator used in the user's application, build the library with
+enabled Fortran 2008 features and this communicator type is expected.
+
+Retrieving information from the balancer is also different, since most
+getter return (a reference to) an object itself. The Fortran subroutines
+set the values of its arguments. As an example
+
+    int work = all.getWork();
+
+becomes
+
+    integer(c_int) :: work
+    call ALL_get_work(work) !or
+    !call all%get_work(work)
+
+Since there is no exception handling in Fortran, the error handling from
+the libc is borrowed. So there are additional function that retrieve and
+reset an error number as well as an additional error message describing
+the error. These must be queried and handled by the user. An example of
+this usage is shown in the `ALL_Staggered_f` example when printing the
+VTK outlines.
+
+### Details of the balancing methods
+#### Tensor based
+In order to equalize the load of individual domains, the assumption is
+made that this can be achieved by equalizing the work in each cartesian
+direction, i.e. the work of all domains having the same coordinate in a
+cartesian direction is collected and the width of all these domains in
+this direction is adjusted by comparing this collective work with the
+collective work of the neighboring domains. This is done independently
+for each cartesian direction in the system.
+
+Required number of vertices:
+ - two, one describing the lower left front point and one describing the
+   upper right back point of the domain
+
+Advantages:
+ - topology of the system is maintained (orthogonal domains, neighbor
+   relations)
+ - no update for the neighbor relations need to be made in the calling
+   code
+ - if a code was able to deal with orthogonal domains, only small
+   changes are expected to include this strategy
+
+Disadvantages:
+ - due to the comparison of collective work loads in cartesian layers
+   and restrictions resulting from this construction, the final result
+   might lead to a sub-optimal domain distribution
+
+#### Staggered grid
+The staggered grid approach is a hierarchical one. In a first step the
+work of all domains sharing the same cartesian coordinate with respect
+to the highest dimension (z in three dimensions) is collected. Then,
+like in the TENSOR strategy the layer width in this dimension is
+adjusted based on comparison of the collected work with the collected
+work of the neighboring domains in the same dimension. As a second step
+each of these planes is divided into a set of columns, where all domains
+share the same cartesian coordinate in the next lower dimension (y in
+three dimensions). For each of these columns the before described
+procedure is repeated, i.e. work collected and the width of the columns
+adjusted accordingly. Finally, in the last step the work of individual
+domains is compared to direct neighbors and the width in the last
+dimension (x in three dimensions) adjusted. This leads to a staggered
+grid, that is much better suited to describe inhomogeneous work
+distributions than the TENSOR strategy.
+
+Required number of vertices:
+ - two, one describing the lower left front point and one describing the
+   upper right back point of the domain
+
+Advantages:
+ - very good equalization results for the work load
+ - maintains orthogonal domains
+
+Disadvantages:
+ - changes topology of the domains and requires adjustment of neighbor
+   relations
+ - communication pattern in the calling code might require adjustment to
+   deal with changing neighbors
+
+#### Histogram based staggered grid
+
+The histogram-based method works differently than the first two methods
+in such a way, that it is a global method that requires three distinct
+steps for a single adjustment. In each of these steps the following
+takes place: a partial histogram needs to be created over the workload,
+e.g. number of particles, along one direction on each domain, then these
+are supplied to the method, where a global histogram is computed. With
+this global histogram a distribution function is created. This is used
+to compute the optimal (possible) width of domains in that direction.
+For the second and third steps the computation of the global histograms
+and distribution functions take place in subsystems, being the results
+of the previous step.  The result is the most optimal distribution of
+domains according to the Staggered-grid method, at the cost of global
+exchange of work, due to the global adjustment, which makes the method
+not well suited to highly dynamic systems, due to the need of frequent
+updates. On the other hand the method is well suited for static
+problems, e.g. grid-based simulations.
+
+Note: Currently the order of dimensions is: z-y-x.
+
+Required number of vertices:
+ - two, one describing the lower left front point and one describing the
+   upper right back point of the domain
+
+Additional requirements:
+ - partial histogram created over the workload on the local domain in
+   the direction of the current correction step
+
+Advantages:
+ - supplies an optimal distribution of domains (restricted by width of
+   bins used for the histogram)
+ - only three steps needed to acquire result
+
+Disadvantages:
+ - expensive in cost of communication, due to global shifts
+ - requires preparation of histogram and shift of work between each of
+   the three correction steps
+
+
+## Example programs
+In the `/examples` sub directory several C++ and Fortran example
+programs are placed.
+
+### `ALL_test`
+MPI C++ code that generates a particle distribution over the domains. At
+program start the domains form an orthogonal decomposition of the cubic
+3d system. Each domain has the same volume as each other domain.
+Depending on the cartesian coordinates of the domain in this
+decomposition, a number of points is created on each domain. The points
+are then distributed uniformly over the domain. For the number of points
+a polynomial formula is used to create a non-uniform point distribution.
+As an estimation of the work load in this program the number of points
+within a domain was chosen. There is no particle movement included in
+the current version of the example code, therefore particle
+communication only takes place due to the shift of domain borders.
+
+The program creates three output files in its basic version:
+
+ - `minmax.dat`:
+    - **Columns**: `<iteration count> <W_min/W_avg> <W_max_W_avg>
+      <(W_max-W_min)/(W_max+W_min)>`
+    - **Explanation**: In order to try to give a comparable metric for
+      the success of the load balancing procedure the relatative
+      difference of the minimum and maximum work loads in the system in
+      relation to the average work load of the system are given. The
+      last column gives an indicator for the inequality of work in the
+      system, in a perfectly balanced system, this value should be equal
+      to zero.
+    
+ - `stddev.txt`:
+    - **Columns**: `<iteration count> <std_dev>`
+    - **Explanation**: The standard deviation of work load over all
+      domains.
+
+ - `domain_data.dat`:
+    - **Columns**: `<rank> <x_min> <x_max> <y_min> <y_max> <z_min>
+      <z_max> <W>`
+    - **Explanation**: There are two blocks of rows, looking like this,
+      the first block is the starting configuration of the domains,
+      which should be a uniform grid of orthogonal domains. The second
+      block is the configuration after 200 load balancing steps. In each
+      line the MPI rank of the domain and the extension in each
+      cartesian direction is given, as well as the work load (the number
+      of points).
+
+If the library is compiled with VTK support, `ALL_test` also creates a VTK
+based output of the domains in each iteration step. This output will be
+placed in a `vtk_outline` sub directory, which is created if it does not
+already exist. The resulting output can be visualized with tools like
+ParaView or VisIt.
+
+### `ALL_test_f` and `ALL_test_f_obj`
+The Fortran example provides a more basic version of the test program
+`ALL_test`, as its main goal is to show the functionality of the Fortran
+interface. The code creates a basic uniform orthogonal domain
+decomposition and creates an inhomogeneous particle distribution over
+these. Only one load balancing step is executed and the program prints
+out the domain distribution of the start configuration and of the final
+configuration.
+
+### `ALL_Staggered` and `ALL_Staggered_f`
+These create a very simple load balancing situation with fixed load and
+domains placed along the z direction. The C++ and Fortran versions are
+functionally the same, so the differences between the interfaces can be
+checked.
+
+<!-- vim: set tw=72 et ts=4 spell spelllang=en_us ft=markdown: -->
diff --git a/ci/build_mpi_complete_gcc8.sh b/ci/build_mpi_complete_gcc8.sh
new file mode 100755
index 0000000000000000000000000000000000000000..4e6df36807e58afd7eb41c2180e77aacedc2afca
--- /dev/null
+++ b/ci/build_mpi_complete_gcc8.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+source $(cd "$(dirname "$0")"; pwd -P)/ci_funcs.sh
+
+# create badge
+create_badge "${BADGE_FILENAME}" build-mpi unknown "--color=#808080"
+[[ ${BADGE_ONLY} ]] && pushbadge_exit "${BADGE_FILENAME}" 0
+
+# load MPI environment
+load_MPI
+
+# build
+mkdir -p build && cd build
+
+if [[ $? == 0 ]]; then
+
+    #CC=/usr/lib64/mpi/gcc/openmpi3/bin/mpicc CXX=/usr/lib64/mpi/gcc/openmpi3/bin/mpicxx FC=/usr/lib64/mpi/gcc/openmpi3/bin/mpif90 ${CMAKE} ..
+    ln -s /usr/bin/vtk7 /usr/bin/vtk
+    ln -s /usr/bin/vtk7 /usr/bin/pvtk
+    ${CMAKE} .. -DCM_ALL_FORTRAN=ON -DCM_ALL_VORONOI=ON -DCM_ALL_VTK_OUTPUT=ON -DCM_ALL_TESTS=ON -DCM_ALL_AUTO_DOC=ON -DVTK_DIR=/usr/local/lib/cmake/vtk-8.2 
+    make VERBOSE=1
+    make test
+
+    if [[ $? == 0 ]]; then
+        create_badge "${BADGE_FILENAME}" build-mpi passed --color=green
+        pushbadge_exit "${BADGE_FILENAME}" 0
+    fi
+fi
+
+create_badge "${BADGE_FILENAME}" build-mpi failed --color=red
+pushbadge_exit "${BADGE_FILENAME}" 1
diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt
index 3f583b73e1dbb460cf3d7d52ef50c696c85280a4..8139cbcd42eeb43c3507b98d0e832a0b2df73a84 100644
--- a/docs/CMakeLists.txt
+++ b/docs/CMakeLists.txt
@@ -1,24 +1,47 @@
 message("Generating ALL Documentation:")
 find_package(Doxygen REQUIRED)
+find_package(Python3 REQUIRED)
+message(DEBUG "Python3 version: ${PYTHON3_VERSION}")
 find_package(Sphinx REQUIRED)
+#find breathe
+# todo(s.schulz): The following does not seem to work. It returns non zero even
+# though the package is installed and the correct Python interpreter is found.
+# The python interpreter is in a non standard path and included using
+# environment variables and the module was installed in the users directory
+# (pip3 install --user breathe).
+#execute_process(
+#	COMMAND Python3::Interpreter -c "import breathe"
+#	RESULT_VARIABLE BREATHE_NOT_FOUND
+#	OUTPUT_QUIET
+#	)
+#if(NOT ${BREATHE_NOT_FOUND} EQUAL 0)
+#	message( FATAL_ERROR "Python \"breathe\" package not found")
+#endif()
+# todo(s.schulz): Also find recommonmark
+#do we need to make sure sphinx_rtd_theme is installed?
 
 # Find all public headers
 get_target_property(ALL_PUBLIC_HEADER_DIR ALL INTERFACE_INCLUDE_DIRECTORIES)
 file(GLOB_RECURSE ALL_PUBLIC_HEADERS ${ALL_PUBLIC_HEADER_DIR})
 
 # setup configuring of doxygen
-set(DOXYGEN_INPUT_DIR ${PROJECT_SOURCE_DIR}/include)
+# Caveat(s.schulz): Make sure there are not spaces within the paths.
+set(DOXYGEN_INPUT_DIR ${PROJECT_SOURCE_DIR}/include ${PROJECT_SOURCE_DIR}/src ${PROJECT_SOURCE_DIR}/example)
+string(REPLACE ";" "\" \"" DOXYGEN_INPUT_DIR "${DOXYGEN_INPUT_DIR}")
 set(DOXYGEN_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/doc/doxygen)
 set(DOXYGEN_INDEX_FILE ${DOXYGEN_OUTPUT_DIR}/xml/index.xml)
 set(DOXYFILE_IN ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in)
 set(DOXYFILE_OUT ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile)
-
+set(DOXYGEN_INTERNAL_DOCS NO)
+set(DOXYGEN_EXAMPLE_PATH ${PROJECT_SOURCE_DIR}/example)
+set(DOXYGEN_USE_MDFILE_AS_MAINPAGE ${PROJECT_SOURCE_DIR}/README.md)
 # configure doxygen config file
 configure_file(${DOXYFILE_IN} ${DOXYFILE_OUT} @ONLY)
 
 # create doxygen output directory
 file(MAKE_DIRECTORY ${DOXYGEN_OUTPUT_DIR})
 
+# todo(s.schulz): Add dependency on other files (Fortran module etc.)
 add_custom_command( 
     OUTPUT ${DOXYGEN_INDEX_FILE}
     DEPENDS ${ALL_PUBLIC_HEADERS} ${ALL_EXE_NAME}
@@ -34,17 +57,23 @@ set(SPHINX_SOURCE ${CMAKE_CURRENT_SOURCE_DIR})
 set(SPHINX_BUILD ${CMAKE_CURRENT_BINARY_DIR}/sphinx)
 set(SPHINX_INDEX_FILE ${SPHINX_BUILD}/index.html)
 
+set(SPHINX_IN ${CMAKE_CURRENT_SOURCE_DIR}/conf.py.in)
+set(SPHINX_OUT ${CMAKE_CURRENT_BINARY_DIR}/conf.py)
+set(SPHINX_BREATHE_PROJECT_PATH ${DOXYGEN_OUTPUT_DIR}/xml)
+configure_file(${SPHINX_IN} ${SPHINX_OUT} @ONLY)
+
 
+# todo(s.schulz): Add dependencies to all .rst files
 add_custom_command(OUTPUT ${SPHINX_INDEX_FILE}
                   COMMAND
                   ${SPHINX_EXECUTABLE} -b html
-                  -Dbreathe_projects.ALL=${DOXYGEN_OUTPUT_DIR}/xml
+                  -c "${CMAKE_CURRENT_BINARY_DIR}"
                   ${SPHINX_SOURCE} ${SPHINX_BUILD}
                   WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
                   DEPENDS 
                   ${CMAKE_CURRENT_SOURCE_DIR}/index.rst
                   ${DOXYGEN_INDEX_FILE}
-                  MAIN_DEPENDENCY ${SPHINX_SOURCE}/conf.py
+                  MAIN_DEPENDENCY ${SPHINX_SOURCE}/conf.py.in
                   COMMENT "Generating ALL Docs (Sphinx)"
                   )
 
@@ -52,4 +81,6 @@ add_custom_target(Sphinx ALL DEPENDS ${SPHINX_INDEX_FILE})
 
 include(GNUInstallDirs)
 install(DIRECTORY ${SPHINX_BUILD}
-DESTINATION ${CMAKE_INSTALL_DOCDIR})
\ No newline at end of file
+        DESTINATION ${CMAKE_INSTALL_DOCDIR})
+install(DIRECTORY ${DOXYGEN_OUTPUT_DIR}/html
+        DESTINATION ${CMAKE_INSTALL_DOCDIR})
diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in
index cfdc3156d16da7982db444c9794a11a1a9944f9a..5062818642a330954fef6143f043d41c8f3935a7 100644
--- a/docs/Doxyfile.in
+++ b/docs/Doxyfile.in
@@ -32,19 +32,19 @@ DOXYFILE_ENCODING      = UTF-8
 # title of most generated pages and in a few other places.
 # The default value is: My Project.
 
-PROJECT_NAME           = "MPCD"
+PROJECT_NAME           = "@PROJECT_NAME@"
 
 # The PROJECT_NUMBER tag can be used to enter a project or revision number. This
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         =
+PROJECT_NUMBER         = "@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@.@PROJECT_VERSION_PATCH@"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
 # quick idea about the purpose of the project. Keep the description short.
 
-PROJECT_BRIEF          =
+PROJECT_BRIEF          = "@PROJECT_DESCRIPTION@"
 
 # With the PROJECT_LOGO tag one can specify a logo or an icon that is included
 # in the documentation. The maximum height of the logo should not exceed 55
@@ -226,7 +226,7 @@ SEPARATE_MEMBER_PAGES  = NO
 # uses this value to replace tabs by spaces in code fragments.
 # Minimum value: 1, maximum value: 16, default value: 4.
 
-TAB_SIZE               = 4
+TAB_SIZE               = 2
 
 # This tag can be used to specify a number of aliases that act as commands in
 # the documentation. An alias has the form:
@@ -328,7 +328,7 @@ AUTOLINK_SUPPORT       = YES
 # diagrams that involve STL classes more complete and accurate.
 # The default value is: NO.
 
-BUILTIN_STL_SUPPORT    = NO
+BUILTIN_STL_SUPPORT    = YES
 
 # If you use Microsoft's C++/CLI language, you should set this option to YES to
 # enable parsing support.
@@ -397,7 +397,7 @@ INLINE_GROUPED_CLASSES = NO
 # Man pages) or section (for LaTeX and RTF).
 # The default value is: NO.
 
-INLINE_SIMPLE_STRUCTS  = NO
+INLINE_SIMPLE_STRUCTS  = YES
 
 # When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or
 # enum is documented as struct, union, or enum with the name of the typedef. So
@@ -435,7 +435,7 @@ LOOKUP_CACHE_SIZE      = 0
 # normally produced when WARNINGS is set to YES.
 # The default value is: NO.
 
-EXTRACT_ALL            = NO
+EXTRACT_ALL            = YES
 
 # If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will
 # be included in the documentation.
@@ -515,7 +515,7 @@ HIDE_IN_BODY_DOCS      = NO
 # will be excluded. Set it to YES to include the internal documentation.
 # The default value is: NO.
 
-INTERNAL_DOCS          = NO
+INTERNAL_DOCS          = @DOXYGEN_INTERNAL_DOCS@
 
 # If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
 # names in lower-case letters. If set to YES, upper-case letters are also
@@ -578,7 +578,7 @@ SORT_MEMBER_DOCS       = YES
 # this will also influence the order of the classes in the class list.
 # The default value is: NO.
 
-SORT_BRIEF_DOCS        = NO
+SORT_BRIEF_DOCS        = YES
 
 # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the
 # (brief and detailed) documentation of class members so that constructors and
@@ -590,7 +590,7 @@ SORT_BRIEF_DOCS        = NO
 # detailed member documentation.
 # The default value is: NO.
 
-SORT_MEMBERS_CTORS_1ST = NO
+SORT_MEMBERS_CTORS_1ST = YES
 
 # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy
 # of group names into alphabetical order. If set to NO the group names will
@@ -623,7 +623,7 @@ STRICT_PROTO_MATCHING  = NO
 # list. This list is created by putting \todo commands in the documentation.
 # The default value is: YES.
 
-GENERATE_TODOLIST      = YES
+GENERATE_TODOLIST      = NO
 
 # The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test
 # list. This list is created by putting \test commands in the documentation.
@@ -820,45 +820,24 @@ FILE_PATTERNS          = *.c \
                          *.cxx \
                          *.cpp \
                          *.c++ \
-                         *.java \
-                         *.ii \
-                         *.ixx \
-                         *.ipp \
-                         *.i++ \
-                         *.inl \
-                         *.idl \
-                         *.ddl \
-                         *.odl \
                          *.h \
                          *.hh \
                          *.hxx \
                          *.hpp \
                          *.h++ \
-                         *.cs \
-                         *.d \
-                         *.php \
-                         *.php4 \
-                         *.php5 \
-                         *.phtml \
                          *.inc \
-                         *.m \
                          *.markdown \
                          *.md \
-                         *.mm \
                          *.dox \
                          *.py \
                          *.pyw \
                          *.f90 \
+                         *.F90 \
                          *.f95 \
                          *.f03 \
                          *.f08 \
                          *.f \
-                         *.for \
-                         *.tcl \
-                         *.vhd \
-                         *.vhdl \
-                         *.ucf \
-                         *.qsf
+                         *.for
 
 # The RECURSIVE tag can be used to specify whether or not subdirectories should
 # be searched for input files as well.
@@ -906,7 +885,7 @@ EXCLUDE_SYMBOLS        =
 # that contain example code fragments that are included (see the \include
 # command).
 
-EXAMPLE_PATH           =
+EXAMPLE_PATH           = "@DOXYGEN_EXAMPLE_PATH@"
 
 # If the value of the EXAMPLE_PATH tag contains directories, you can use the
 # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
@@ -982,7 +961,7 @@ FILTER_SOURCE_PATTERNS =
 # (index.html). This can be useful if you have a project on for instance GitHub
 # and want to reuse the introduction page also for the doxygen output.
 
-USE_MDFILE_AS_MAINPAGE =
+USE_MDFILE_AS_MAINPAGE = "@DOXYGEN_USE_MDFILE_AS_MAINPAGE@"
 
 #---------------------------------------------------------------------------
 # Configuration options related to source browsing
@@ -995,7 +974,7 @@ USE_MDFILE_AS_MAINPAGE =
 # also VERBATIM_HEADERS is set to NO.
 # The default value is: NO.
 
-SOURCE_BROWSER         = NO
+SOURCE_BROWSER         = YES
 
 # Setting the INLINE_SOURCES tag to YES will include the body of functions,
 # classes and enums directly into the documentation.
@@ -1249,7 +1228,7 @@ HTML_TIMESTAMP         = NO
 # The default value is: NO.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-HTML_DYNAMIC_SECTIONS  = NO
+HTML_DYNAMIC_SECTIONS  = YES
 
 # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries
 # shown in the various tree structured indices initially; the user can expand
@@ -1478,7 +1457,7 @@ DISABLE_INDEX          = NO
 # The default value is: NO.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-GENERATE_TREEVIEW      = NO
+GENERATE_TREEVIEW      = YES
 
 # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that
 # doxygen will group on one line in the generated HTML documentation.
@@ -1666,7 +1645,7 @@ EXTRA_SEARCH_MAPPINGS  =
 # If the GENERATE_LATEX tag is set to YES, doxygen will generate LaTeX output.
 # The default value is: YES.
 
-GENERATE_LATEX         = YES
+GENERATE_LATEX         = NO
 
 # The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a
 # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
@@ -2099,7 +2078,7 @@ INCLUDE_FILE_PATTERNS  =
 # recursively expanded use the := operator instead of the = operator.
 # This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
 
-PREDEFINED             =
+PREDEFINED             = ALL_VORONOI_ACTIVE ALL_VTK_OUTPUT
 
 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
 # tag can be used to specify a list of macro names that should be expanded. The
@@ -2331,7 +2310,7 @@ INCLUDED_BY_GRAPH      = YES
 # The default value is: NO.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-CALL_GRAPH             = NO
+CALL_GRAPH             = YES
 
 # If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller
 # dependency graph for every global function or class method.
@@ -2343,7 +2322,7 @@ CALL_GRAPH             = NO
 # The default value is: NO.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-CALLER_GRAPH           = NO
+CALLER_GRAPH           = YES
 
 # If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical
 # hierarchy of all classes instead of a textual one.
diff --git a/docs/api/ALL.rst b/docs/api/ALL.rst
new file mode 100644
index 0000000000000000000000000000000000000000..b91fc10ec25d21e7946952baa34f9d7525a1548a
--- /dev/null
+++ b/docs/api/ALL.rst
@@ -0,0 +1,8 @@
+.. _ALLClass:
+
+ALL class
+=========
+
+.. doxygenclass:: ALL::ALL
+
+.. vim: et sw=2 ts=2 tw=74 spell spelllang=en_us:
diff --git a/docs/api/ALL_Point.rst b/docs/api/ALL_Point.rst
new file mode 100644
index 0000000000000000000000000000000000000000..1ff5eaa6366e48e54170e22e0d26449b9228da1e
--- /dev/null
+++ b/docs/api/ALL_Point.rst
@@ -0,0 +1,8 @@
+.. _PointClass:
+
+Point class
+===========
+
+.. doxygenclass:: ALL::Point
+
+.. vim: et sw=2 ts=2 tw=74 spell spelllang=en_us:
diff --git a/docs/api/ALL_module.rst b/docs/api/ALL_module.rst
new file mode 100644
index 0000000000000000000000000000000000000000..6d889eba04631945da726cf9470c16ee0b46876e
--- /dev/null
+++ b/docs/api/ALL_module.rst
@@ -0,0 +1,87 @@
+.. _ALLModule:
+
+ALL Fortran module
+==================
+
+Due to the case insensitive nature of Fortran and the used toolchain
+(doxygen and sphinx), all variables and function names are lowercase on
+this page. We do, however, use only upper case for global constants, such
+as the load balancing methods (``ALL_STAGGERED`` etc.) and the function
+names are usually written similarly to MPI as ``ALL_get_work``. The object
+is of ``type(ALL_t)``.
+
+Methods
+-------
+Available methods are:
+
+.. doxygenvariable:: all_histogram
+.. doxygenvariable:: all_staggered
+.. doxygenvariable:: all_tensor
+.. doxygenvariable:: all_unstructured
+.. doxygenvariable:: all_voronoi
+
+
+Errors
+------
+The exceptions map to the following error constants:
+
+.. doxygenvariable:: all_error_filesystem
+.. doxygenvariable:: all_error_generic
+.. doxygenvariable:: all_error_internal
+.. doxygenvariable:: all_error_invalidargument
+.. doxygenvariable:: all_error_invalidcommtype
+.. doxygenvariable:: all_error_outofbounds
+.. doxygenvariable:: all_error_pointdimensionmissmatch
+
+And the error message is returned in a string of length
+``ALL_ERROR_LENGTH``.
+
+Functions
+---------
+These function all have a corresponding call in the object, with the
+``ALL_`` stripped from the function name. So with an object named
+``balancer`` the function ``ALL_get_work(balancer, work)`` is identical to
+``balancer%get_work(work)``. The following functions are available:
+
+.. doxygenfunction:: all_balance
+.. doxygenfunction:: all_finalize
+.. doxygenfunction:: all_setup
+
+Getters
+*******
+.. doxygenfunction:: all_get_dimension
+.. doxygenfunction:: all_get_gamma
+.. doxygenfunction:: all_get_length_of_work
+.. doxygenfunction:: all_get_neighbors
+.. doxygenfunction:: all_get_number_of_neighbors
+.. doxygenfunction:: all_get_number_of_vertices
+.. doxygenfunction:: all_get_prev_vertices
+.. doxygenfunction:: all_get_vertices
+.. doxygenfunction:: all_get_vertices_alloc
+.. doxygenfunction:: all_get_work
+.. doxygenfunction:: all_get_work_array
+
+Setters
+*******
+.. doxygenfunction:: all_set_gamma
+.. doxygenfunction:: all_set_method_data_histgram
+.. doxygenfunction:: all_set_min_domain_size
+.. doxygenfunction:: all_set_proc_grid_params
+.. doxygenfunction:: all_set_proc_tag
+.. doxygenfunction:: all_set_sys_size
+.. doxygenfunction:: all_set_vertices
+.. doxygenfunction:: all_set_work
+.. doxygenfunction:: all_set_work_multi
+
+Output
+******
+.. doxygenfunction:: all_print_vtk_outlines
+.. doxygenfunction:: all_print_vtk_vertices
+
+Error handling
+**************
+.. doxygenfunction:: all_error
+.. doxygenfunction:: all_error_description
+.. doxygenfunction:: all_reset_error
+
+.. vim: et sw=2 ts=2 tw=74 spell spelllang=en_us:
diff --git a/docs/api/CustomExceptions.rst b/docs/api/CustomExceptions.rst
new file mode 100644
index 0000000000000000000000000000000000000000..24307560f63d05048597b1851420f5162c3443d2
--- /dev/null
+++ b/docs/api/CustomExceptions.rst
@@ -0,0 +1,44 @@
+.. _CustomExceptions:
+
+Custom Exceptions
+=================
+
+The library may throw one of these exceptions, which are all based on
+``ALL::CustomException``.
+
+``CustomException``
+-------------------
+.. doxygenstruct:: ALL::CustomException
+  :outline:
+
+``FilesystemErrorException``
+****************************
+.. doxygenstruct:: ALL::FilesystemErrorException
+  :outline:
+
+``InternalErrorException``
+**************************
+.. doxygenstruct:: ALL::InternalErrorException
+  :outline:
+
+``InvalidArgumentException``
+****************************
+.. doxygenstruct:: ALL::InvalidArgumentException
+  :outline:
+
+``InvalidCommTypeException``
+****************************
+.. doxygenstruct:: ALL::InvalidCommTypeException
+  :outline:
+
+``OutOfBoundsException``
+************************
+.. doxygenstruct:: ALL::OutOfBoundsException
+  :outline:
+
+``PointDimensionMissmatchException``
+************************************
+.. doxygenstruct:: ALL::PointDimensionMissmatchException
+  :outline:
+
+.. vim: et sw=2 ts=2 tw=74 spell spelllang=en_us:
diff --git a/docs/api/index.rst b/docs/api/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..9459ed494336b1a3b73713794edadee38ac7d62c
--- /dev/null
+++ b/docs/api/index.rst
@@ -0,0 +1,15 @@
+Public API Reference
+====================
+
+.. toctree::
+  :hidden:
+
+  ALL.rst
+  CustomExceptions.rst
+  ALL_Point.rst
+  ALL_module.rst
+
+These pages should give a short overview of the public API
+
+
+.. vim: et sw=2 ts=2 tw=74 spell spelllang=en_us:
diff --git a/docs/classes/ALL.rst b/docs/classes/ALL.rst
deleted file mode 100644
index e28b5efb061a66e95153204f000e7a26190a0c12..0000000000000000000000000000000000000000
--- a/docs/classes/ALL.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-===
-ALL
-===
-
-.. doxygenclass:: ALL
-   :members:
-   :protected-members:
-   :private-members:
-   :undoc-members:
\ No newline at end of file
diff --git a/docs/classes/ALL_Functions.rst b/docs/classes/ALL_Functions.rst
deleted file mode 100644
index 2be459b8d3f6989083b707c845aa15edb21f4867..0000000000000000000000000000000000000000
--- a/docs/classes/ALL_Functions.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-======================
-ALL_Functions overview
-======================
-
-.. doxygennamespace:: ALL_Functions
-   :project: ALL
-
diff --git a/docs/classes/ALL_Histogram.rst b/docs/classes/ALL_Histogram.rst
deleted file mode 100644
index 0c5dcbbcfa7761a62a9dc76af2941a3ca3122d55..0000000000000000000000000000000000000000
--- a/docs/classes/ALL_Histogram.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-=============
-ALL_Histogram
-=============
-
-.. doxygenclass:: ALL_Histogram_LB
-   :members:
-   :protected-members:
-   :private-members:
-   :undoc-members:
-
diff --git a/docs/classes/ALL_LB.rst b/docs/classes/ALL_LB.rst
deleted file mode 100644
index 161213f7bd201651a6fdc19b3492c9efd9fd2db6..0000000000000000000000000000000000000000
--- a/docs/classes/ALL_LB.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-=============
-ALL_LB
-=============
-
-.. doxygenclass:: ALL_LB
-   :members:
-   :protected-members:
-   :private-members:
-   :undoc-members:
\ No newline at end of file
diff --git a/docs/classes/ALL_Point.rst b/docs/classes/ALL_Point.rst
deleted file mode 100644
index 811bc90c1224f239da5606e2a9ff88a0c25b42c6..0000000000000000000000000000000000000000
--- a/docs/classes/ALL_Point.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-=============
-ALL_Point
-=============
-
-.. doxygenclass:: ALL_Point
-   :members:
-   :protected-members:
-   :private-members:
-   :undoc-members:
\ No newline at end of file
diff --git a/docs/classes/ALL_Staggered.rst b/docs/classes/ALL_Staggered.rst
deleted file mode 100644
index 4ee2c699b606253b9d379180a5bf82b904592d13..0000000000000000000000000000000000000000
--- a/docs/classes/ALL_Staggered.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-=============
-ALL_Staggered
-=============
-
-.. doxygenclass:: ALL_Staggered_LB
-   :members:
-   :protected-members:
-   :private-members:
-   :undoc-members:
-
diff --git a/docs/classes/ALL_Tensor.rst b/docs/classes/ALL_Tensor.rst
deleted file mode 100644
index 615fd497cb737a95fda76dcb9689ffad2d6ea177..0000000000000000000000000000000000000000
--- a/docs/classes/ALL_Tensor.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-==========
-ALL_Tensor
-==========
-
-.. doxygenclass:: ALL_Tensor_LB
-   :members:
-   :private-members:
-   :protected-members:
-   :undoc-members:
\ No newline at end of file
diff --git a/docs/conf.py b/docs/conf.py.in
similarity index 77%
rename from docs/conf.py
rename to docs/conf.py.in
index 1a8868ae854b8a31dd3896b42a8dd104550b3f38..b537c02233d548f85b92e884206ace2144a89be0 100644
--- a/docs/conf.py
+++ b/docs/conf.py.in
@@ -13,13 +13,16 @@
 # import os
 # import sys
 # sys.path.insert(0, os.path.abspath('.'))
+from recommonmark.parser import CommonMarkParser
 
 
 # -- Project information -----------------------------------------------------
 
-project = 'ALL'
+project = '@PROJECT_NAME@'
 copyright = '2020, Rene Halver, Forschungszentrum Juelich GmbH'
 author = 'Rene Halver'
+version = '@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@'
+release = '@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@.@PROJECT_VERSION_PATCH@'
 
 
 # -- General configuration ---------------------------------------------------
@@ -32,6 +35,9 @@ extensions = [ "breathe" ]
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
 
+#source_parseres = { '.md': CommonMarkParser }
+#source_suffix = ['.rst', '.md']
+
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
@@ -50,5 +56,12 @@ html_theme = 'sphinx_rtd_theme'
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = ['_static']
 
+html_theme_options = {
+    'sticky_navigation': True,
+    'style_external_links': True,
+}
+
 # Breath Config
-breathe_default_project = "ALL"
\ No newline at end of file
+breathe_projects = {"ALL":"@SPHINX_BREATHE_PROJECT_PATH@"}
+breathe_default_project = "ALL"
+breathe_default_members = ('members', 'undoc-members')
diff --git a/docs/full/complete.rst b/docs/full/complete.rst
deleted file mode 100644
index 0155ae66a342c030ae1609ac485940a1c7182d99..0000000000000000000000000000000000000000
--- a/docs/full/complete.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-.. .. _classes/FullDocumentation
-
-.. ==================
-.. Full Documentation
-.. ==================
-
-.. .. doxygenindex:: 
\ No newline at end of file
diff --git a/docs/index.rst b/docs/index.rst
index 55c09b18cf0061cddef79202748ee12138180757..63436a5a5e3fdd5333bd90203228af252b5c829c 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -3,22 +3,17 @@
    You can adapt this file completely to your liking, but it should at least
    contain the root `toctree` directive.
 
-Welcome to ALL auto generated documentation!
-============================================
+Welcome to the documentation of ALL
+===================================
 
 .. toctree::
-   :maxdepth: 2
-   :caption: Contents:
-   :glob:
+  :maxdepth: 2
+  :caption: Contents:
+  :includehidden:
 
-   classes/*
+  api/index.rst
 
-.. .. toctree::
-..    :maxdepth: 2
-..    :caption: Full documentation:
-..    :glob:
-      
-..    full/*
+todo(s.schulz): Link to doxygen documentation.
 
 Indices and tables
 ==================
@@ -28,3 +23,4 @@ Indices and tables
 
 .. * :ref:`modindex`
 
+.. vim: et sw=2 ts=2 tw=74 spell spelllang=en_us:
diff --git a/example/ALL_Staggered.cpp b/example/ALL_Staggered.cpp
index 607fdeb72af8f1b79683b2d7dfc2862db4f59433..328fb23916382ef59c26b2ea43bb56c2514289bc 100644
--- a/example/ALL_Staggered.cpp
+++ b/example/ALL_Staggered.cpp
@@ -224,11 +224,17 @@ int main(int argc, char** argv)
 			fflush(stdout);
 		}
 #ifdef ALL_VTK_OUTPUT_EXAMPLE
-		jall->printVTKoutlines(CurrentStep);
+		try {
+			jall->printVTKoutlines(CurrentStep);
+		} catch (ALL::FilesystemErrorException &e) {
+			std::cout << e.what() << std::endl;
+			// Maybe also treat this error in some way, e.g. whether the simulation
+			// should abort if no output could be written or not.
+		}
 #endif
 		jall->balance();
 
-		std::vector<ALL::Point<double>> NewVertices = jall->getResultVertices();
+		std::vector<ALL::Point<double>> NewVertices = jall->getVertices();
 		//MPI_RUN_ORDER(MPI_COMM_WORLD, MyRank, MaximumRank, (print_width(MyRank, NewVertices.at(1)[2]-NewVertices.at(0)[2], NewVertices.at(0)[2], NewVertices.at(1)[2])));
 		MPI_RUN_ORDER_DEF((print_testing_output(MyRank, NewVertices, CurrentStep+1)));
 		// Maybe print our new domain? Or not..
@@ -239,7 +245,11 @@ int main(int argc, char** argv)
 		MPI_Barrier(MPI_COMM_WORLD);
 	}
 #ifdef ALL_VTK_OUTPUT_EXAMPLE
-	jall->printVTKoutlines(CurrentStep);
+	try {
+		jall->printVTKoutlines(CurrentStep);
+	} catch (ALL::FilesystemErrorException &e) {
+		std::cout << e.what() << std::endl;
+	}
 #endif
 
 	delete jall;
diff --git a/example/ALL_Staggered_f.F90 b/example/ALL_Staggered_f.F90
index 249779c4aea541c3c95954b07d623fef76d6c454..88a9576085a3f40cc2c9767d26f43baff1405056 100644
--- a/example/ALL_Staggered_f.F90
+++ b/example/ALL_Staggered_f.F90
@@ -33,14 +33,14 @@ program ALL_Staggered_f
     use, intrinsic :: iso_fortran_env, only: stdin=>input_unit&
         , stdout=>output_unit&
         , stderr=>error_unit&
-        , file_storage_size&
-        ,int32,int64,real32,real64
+        , file_storage_size
 #ifndef ALL_USE_F08
     use mpi
 #else
     use mpi_f08
 #endif
     implicit none
+    integer, parameter :: real64 = selected_real_kind(15)
 
     call main()
 contains
@@ -139,6 +139,9 @@ contains
         integer :: my_rank, maximum_rank
         integer :: i,j
         type(ALL_t) :: jall
+#ifdef ALL_VTK_OUTPUT
+        character (len=ALL_ERROR_LENGTH) :: error_msg
+#endif
 
         call MPI_Init(error)
 
@@ -207,18 +210,32 @@ contains
                 flush(stdout)
             endif
 #ifdef ALL_VTK_OUTPUT
+            call ALL_reset_error()
             call jall%print_vtk_outlines(current_step)
+            if(ALL_error() /= 0) then
+                error_msg = ALL_error_description()
+                write(stdout, '(a)') error_msg
+                ! Maybe also abort if the output is necesssary or handle this
+                ! some other way.
+            endif
 #endif
             call jall%balance()
 
-            call jall%get_result_vertices(new_vertices)
+            call jall%get_vertices(new_vertices)
 
             call print_testing_output(my_rank, maximum_rank, new_vertices, current_step)
 
             call MPI_Barrier(MPI_COMM_WORLD, error)
         enddo
 #ifdef ALL_VTK_OUTPUT
+        call ALL_reset_error()
         call jall%print_vtk_outlines(current_step)
+        if(ALL_error() /= 0) then
+            error_msg = ALL_error_description()
+            write(stdout, '(a)') error_msg
+            ! Maybe also abort if the output is necesssary or handle this
+            ! some other way.
+        endif
 #endif
 
         call jall%finalize()
diff --git a/example/ALL_Voronoi.cpp b/example/ALL_Voronoi.cpp
index e892edbac5ab5ad4154b438248b8b7be8c9fe8f7..5c172c9b8d9de323232618ad92874511966d5469 100644
--- a/example/ALL_Voronoi.cpp
+++ b/example/ALL_Voronoi.cpp
@@ -196,7 +196,7 @@ int main(int argc, char** argv)
 #endif
 		jall->balance();
 
-		std::vector<ALL::Point<double>> NewVertices = jall->getResultVertices();
+		std::vector<ALL::Point<double>> NewVertices = jall->getVertices();
 		//MPI_RUN_ORDER(MPI_COMM_WORLD, MyRank, MaximumRank, (print_width(MyRank, NewVertices.at(1)[2]-NewVertices.at(0)[2], NewVertices.at(0)[2], NewVertices.at(1)[2])));
 		MPI_RUN_ORDER_DEF((print_testing_output(MyRank, NewVertices, CurrentStep+1)));
 		// Maybe print our new domain? Or not..
diff --git a/example/ALL_test.cpp b/example/ALL_test.cpp
index a1d895d4c54ea37e94fabc5ed9c6f392ace63f38..5827b7df32e17cd763300179e6f0ea636870d77f 100644
--- a/example/ALL_test.cpp
+++ b/example/ALL_test.cpp
@@ -222,7 +222,7 @@ void print_points(std::vector<ALL::Point<double>> plist, int step,
 #ifdef ALL_VORONOI_ACTIVE
       method == ALL::LB_t::VORONOI ||
 #endif
-      method == ALL::LB_t::UNSTRUCTURED) {
+      method == ALL::LB_t::FORCEBASED) {
     if (!vtk_init) {
       controller = vtkMPIController::New();
       controller->Initialize();
@@ -423,8 +423,8 @@ int main(int argc, char **argv) {
       case ALL::LB_t::STAGGERED:
         std::cout << "chosen method: STAGGERED" << std::endl;
         break;
-      case ALL::LB_t::UNSTRUCTURED:
-        std::cout << "chosen method: UNSTRUCTURED" << std::endl;
+      case ALL::LB_t::FORCEBASED:
+        std::cout << "chosen method: FORCEBASED" << std::endl;
         break;
 #ifdef ALL_VORONOI_ACTIVE
       case ALL::LB_t::VORONOI:
@@ -463,7 +463,7 @@ int main(int argc, char **argv) {
     case ALL::LB_t::STAGGERED:
       nvertices = 2;
       break;
-    case ALL::LB_t::UNSTRUCTURED:
+    case ALL::LB_t::FORCEBASED:
       nvertices = 8;
       break;
 #ifdef ALL_VORONOI_ACTIVE
@@ -496,7 +496,7 @@ int main(int argc, char **argv) {
       vertices.at(0) = lp;
       vertices.at(1) = up;
       break;
-    case ALL::LB_t::UNSTRUCTURED:
+    case ALL::LB_t::FORCEBASED:
       for (int i = 0; i < nvertices; ++i)
         vertices.at(0) = lp;
       for (int z = 0; z < 2; ++z)
@@ -1084,7 +1084,7 @@ int main(int argc, char **argv) {
       if (localRank == 0)
         std::cout << "Updating vertices..." << std::endl;
 #endif
-      new_vertices = lb_obj.getResultVertices();
+      new_vertices = lb_obj.getVertices();
       old_vertices = vertices;
       vertices = new_vertices;
 
@@ -1212,9 +1212,8 @@ int main(int argc, char **argv) {
         int n_transfer[2];
         int n_recv[2 * MAX_NEIG];
         int offset_neig[2];
-        std::vector<int> neighbors;
+        std::vector<int> neighbors = lb_obj.getNeighbors();
         int *nNeighbors;
-        lb_obj.getNeighbors(neighbors);
         nNeighbors = neighbors.data();
 
         offset_neig[0] = 0;
@@ -1424,7 +1423,7 @@ int main(int argc, char **argv) {
             offset_neig[1] = offset_neig[0] + nNeighbors[2 * (i + 1)];
         }
       } break;
-      case ALL::LB_t::UNSTRUCTURED: {
+      case ALL::LB_t::FORCEBASED: {
 
         // array of all vertices of all neighbors (and self)
         double comm_vertices[27 * 8 * 3];
@@ -1523,7 +1522,7 @@ int main(int argc, char **argv) {
 #ifdef ALL_VTK_FORCE_SORT
         // creating an unstructured grid for local domain and neighbors
         auto vtkpoints = vtkSmartPointer<vtkPoints>::New();
-        auto unstructuredGrid = vtkSmartPointer<vtkUnstructuredGrid>::New();
+        auto unstructuredGrid = vtkSmartPointer<vtkForceBasedGrid>::New();
         for (int i = 0; i < 27; ++i) {
           for (int v = 0; v < 8; ++v) {
             vtkpoints->InsertNextPoint(comm_vertices[i * 24 + v * 3],
@@ -1586,7 +1585,7 @@ int main(int argc, char **argv) {
         /*
            if (localRank == 26)
            {
-           auto writer = vtkSmartPointer<vtkXMLUnstructuredGridWriter>::New();
+           auto writer = vtkSmartPointer<vtkXMLForceBasedGridWriter>::New();
            writer->SetInputData(unstructuredGrid);
            writer->SetFileName("test.vtu");
            writer->SetDataModeToAscii();
@@ -1823,7 +1822,7 @@ int main(int argc, char **argv) {
 
 #else
         if (localRank == 0)
-          std::cout << "Currently no UNSTRUCTURED test without VTK!"
+          std::cout << "Currently no FORCEBASED test without VTK!"
                     << std::endl;
         MPI_Abort(MPI_COMM_WORLD, -1);
 #endif
@@ -1831,11 +1830,9 @@ int main(int argc, char **argv) {
 #ifdef ALL_VORONOI_ACTIVE
       case ALL::LB_t::VORONOI: {
         // get neighbor information
-        int nNeighbors = lb_obj.getNNeighbors();
-        std::vector<double> neighbor_vertices;
-        std::vector<int> neighbors;
-        lb_obj.getNeighborVertices(neighbor_vertices);
-        lb_obj.getNeighbors(neighbors);
+        std::vector<double> neighbor_vertices = lb_obj.getNeighborVertices();
+        std::vector<int> neighbors = lb_obj.getNeighbors();
+        int nNeighbors = neighbors.size();
 
         // compute voronoi cells
 
@@ -2185,11 +2182,11 @@ int main(int argc, char **argv) {
 #ifdef ALL_VTK_OUTPUT
           // if (localRank == 0)
           //  std::cout << "creating vtk outlines output" << std::endl;
-          if (chosen_method != ALL::LB_t::UNSTRUCTURED)
+          if (chosen_method != ALL::LB_t::FORCEBASED)
             lb_obj.printVTKoutlines(output_step);
           // if (localRank == 0)
           //  std::cout << "creating vtk vertices output" << std::endl;
-          if (chosen_method == ALL::LB_t::UNSTRUCTURED)
+          if (chosen_method == ALL::LB_t::FORCEBASED)
             lb_obj.printVTKvertices(output_step);
           // if (localRank == 0)
           //  std::cout << "creating points output" << std::endl;
@@ -2278,8 +2275,7 @@ int main(int argc, char **argv) {
 
           if (chosen_method == ALL::LB_t::HISTOGRAM) {
             int *nNeighbors;
-            std::vector<int> neighbors;
-            lb_obj.getNeighbors(neighbors);
+            std::vector<int> neighbors = lb_obj.getNeighbors();
             nNeighbors = neighbors.data();
             int n_neig = 0;
             for (int j = 0; j < 3; ++j) {
@@ -2324,7 +2320,7 @@ int main(int argc, char **argv) {
         if (chosen_method != ALL::LB_t::VORONOI) {
 #endif
 #ifdef ALL_VTK_OUTPUT
-          if (chosen_method != ALL::LB_t::UNSTRUCTURED) {
+          if (chosen_method != ALL::LB_t::FORCEBASED) {
             lb_obj.printVTKoutlines(output_step);
             double width[3];
             double volume = 1.0;
@@ -2350,7 +2346,7 @@ int main(int argc, char **argv) {
               MPI_Barrier(cart_comm);
             }
           }
-          if (chosen_method == ALL::LB_t::UNSTRUCTURED)
+          if (chosen_method == ALL::LB_t::FORCEBASED)
             lb_obj.printVTKvertices(output_step);
 #endif
           output_step++;
diff --git a/example/ALL_test_f.f90 b/example/ALL_test_f.f90
index 4dc7bd60153b4f1527f0a40e27696016916a5fd4..b646510a9443b8a0d8471b6eadb463340ad839c5 100644
--- a/example/ALL_test_f.f90
+++ b/example/ALL_test_f.f90
@@ -95,9 +95,9 @@ PROGRAM ALL_test_f
     CALL ALL_setup(obj)
     CALL ALL_balance(obj)
 
-    CALL ALL_get_new_n_vertices(obj,n_vertices)
+    CALL ALL_get_number_of_vertices(obj,n_vertices)
     ALLOCATE(new_vertices(3,n_vertices))
-    CALL ALL_get_result_vertices(obj,new_vertices)
+    CALL ALL_get_vertices(obj,new_vertices)
 
     DO i = 0, n_ranks-1
         IF (i == rank) THEN
diff --git a/example/ALL_test_f_obj.F90 b/example/ALL_test_f_obj.F90
index 95828b688d58fac89d200a08025874b280313475..8488c8d0b10f94173119991be6ff95965372c938 100644
--- a/example/ALL_test_f_obj.F90
+++ b/example/ALL_test_f_obj.F90
@@ -95,8 +95,8 @@ program ALL_test_f
     call lb%balance()
 
     ! If not allocatable as target, must use
-    ! lb%get_new_n_vertices and lb%get_result_vertices instead!
-    call lb%get_result_vertices_alloc(new_vertices)
+    ! lb%get_number_of_vertices and lb%get_vertices instead!
+    call lb%get_vertices_alloc(new_vertices)
 
     do i = 0, n_ranks-1
         if (i == rank) then
diff --git a/example/jube/ALL_benchmark.xml b/example/jube/ALL_benchmark.xml
index 7464a6a11b4b865e61b215f4e5c895c488a5848d..b8296417a0e5e642c29173dca8c2123371b7dc69 100644
--- a/example/jube/ALL_benchmark.xml
+++ b/example/jube/ALL_benchmark.xml
@@ -12,7 +12,7 @@
             <parameter name="weight" type="int">0,1</parameter>
             <parameter name="gamma" type="float">4.0,8.0,16.0,32.0</parameter><-->
             <parameter name="method" type="int">0,1,4</parameter>
-            <parameter name="method_name" mode="python">["STAGGERED","TENSOR","UNSTRUCTURED","VORONOI","HISTOGRAM"][${method}]</parameter>
+            <parameter name="method_name" mode="python">["STAGGERED","TENSOR","FORCEBASED","VORONOI","HISTOGRAM"][${method}]</parameter>
             <parameter name="max_iter" mode="python" type="int">[500,500,-1,-1,3][${method}]</parameter>
             <parameter name="weight" type="int">0,1</parameter>
             <parameter name="gamma" type="float">4.0,8.0,16.0</parameter>
diff --git a/include/ALL.hpp b/include/ALL.hpp
index db15205bb5093cd7fba931ee75681ba6a91002c2..2308e0a689838e7220b9090d5955e5cec15fcdeb 100644
--- a/include/ALL.hpp
+++ b/include/ALL.hpp
@@ -40,7 +40,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #endif
 #include "ALL_Histogram.hpp"
 #include "ALL_Staggered.hpp"
-#include "ALL_Unstructured.hpp"
+#include "ALL_ForceBased.hpp"
 #include <algorithm>
 #include <iomanip>
 #include <memory>
@@ -79,7 +79,7 @@ enum LB_t : int {
   /// tensor based load balancing
   TENSOR = 1,
   /// unstructured-mesh load balancing
-  UNSTRUCTURED = 2,
+  FORCEBASED = 2,
 #ifdef ALL_VORONOI_ACTIVE
   /// voronoi cell based load balancing
   VORONOI = 3,
@@ -142,8 +142,8 @@ public:
     case LB_t::STAGGERED:
       balancer.reset(new Staggered_LB<T, W>(d, (W)0, g));
       break;
-    case LB_t::UNSTRUCTURED:
-      balancer.reset(new Unstructured_LB<T, W>(d, (W)0, g));
+    case LB_t::FORCEBASED:
+      balancer.reset(new ForceBased_LB<T, W>(d, (W)0, g));
       break;
 #ifdef ALL_VORONOI_ACTIVE
     case LB_t::VORONOI:
@@ -283,12 +283,7 @@ public:
   /// method to call the setup of the chosen balancing method (not all methods
   /// require a setup)
   void setup() {
-    try {
-      balancer->setup();
-    } catch (CustomException &e) {
-      std::cout << e.what() << std::endl;
-      MPI_Abort(MPI_COMM_WORLD, -1);
-    }
+    balancer->setup();
   }
 
   /// method the trigger the balancing step, that updates the vertices according
@@ -297,107 +292,102 @@ public:
   /// for some recursive calls of the routine by some methods
   /// @result std::vector<ALL::Point<T>> containing the shifted set of vertices
   std::vector<Point<T>> &balance() {
-    try {
-      nVertices = balancer->getNVertices();
-      switch (method) {
-      case LB_t::TENSOR:
-        balancer->balance(loadbalancing_step);
-        break;
-      case LB_t::STAGGERED:
-
-        /* ToDo: Check if repetition is useful at all and
-         *       change it to be included for all methods,
-         *       not only STAGGERED */
-        /*
-        // estimation to improve speed of convergence
-        // changing vertices during estimation
-        std::vector<Point<T>> tmp_vertices = balancer->getVertices();
-        // saved original vertices
-        std::vector<Point<T>> old_vertices = balancer->getVertices();
-        // old temporary vertices
-        std::vector<Point<T>> old_tmp_vertices = balancer->getVertices();
-        // result temporary vertices
-        std::vector<Point<T>> result_vertices = balancer->getVertices();
-        // temporary work
-        W tmp_work = balancer->getWork().at(0);
-        // old temporary work
-        W old_tmp_work = balancer->getWork().at(0);
-
-        W max_work;
-        W sum_work;
-        double d_max;
-        int n_ranks;
-
-        // compute work density on local domain
-        double V = 1.0;
-        for (int i = 0; i < balancer->getDimension(); ++i)
-            V *= ( outline.at(1)[i] - outline.at(0)[i] );
-        double rho = workArray.at(0) / V;
-
-        // collect maximum work in system
-        MPI_Allreduce(workArray.data(), &max_work, 1, MPIDataTypeW, MPI_MAX,
-        communicator); MPI_Allreduce(workArray.data(), &sum_work, 1,
-        MPIDataTypeW, MPI_SUM, communicator);
-        MPI_Comm_size(communicator,&n_ranks);
-        d_max = (double)(max_work) * (double)n_ranks / (double)sum_work - 1.0;
-
-
-        // internal needs to be readded to argument list const bool internal = false
-        for (int i = 0; i < estimation_limit && d_max > 0.1 && !internal; ++i)
-        {
-            balancer->setWork(tmp_work);
-            balancer->setup();
-            balancer->balance(true);
-            bool sane = true;
-            // check if the estimated boundaries are not too deformed
-            for (int j = 0; j < balancer->getDimension(); ++j)
-                sane = sane && (result_vertices.at(0)[j] <=
-        old_vertices.at(1)[j]);
-            MPI_Allreduce(MPI_IN_PLACE,&sane,1,MPI_CXX_BOOL,MPI_LAND,communicator);
-            if (sane)
-            {
-                old_tmp_vertices = tmp_vertices;
-                tmp_vertices = result_vertices;
-                V = 1.0;
-                for (int i = 0; i < balancer->getDimension(); ++i)
-                    V *= ( tmp_vertices.at(1)[i] - tmp_vertices.at(0)[i] );
-                old_tmp_work = tmp_work;
-                tmp_work = rho * V;
-            }
-            else if (!sane || i == estimation_limit - 1)
-            {
-                balancer->getVertices() = old_tmp_vertices;
-                workArray->at(0) = old_tmp_work;
-                calculate_outline();
-                i = estimation_limit;
-            }
-        }
+    nVertices = balancer->getNVertices();
+    switch (method) {
+    case LB_t::TENSOR:
+      balancer->balance(loadbalancing_step);
+      break;
+    case LB_t::STAGGERED:
 
-        ((Staggered_LB<T,W>*)balancer.get())->setVertices(outline->data());
-        */
-        balancer->balance(loadbalancing_step);
-        break;
-      case LB_t::UNSTRUCTURED:
-        balancer->balance(loadbalancing_step);
-        break;
+      /* ToDo: Check if repetition is useful at all and
+       *       change it to be included for all methods,
+       *       not only STAGGERED */
+      /*
+      // estimation to improve speed of convergence
+      // changing vertices during estimation
+      std::vector<Point<T>> tmp_vertices = balancer->getVertices();
+      // saved original vertices
+      std::vector<Point<T>> old_vertices = balancer->getVertices();
+      // old temporary vertices
+      std::vector<Point<T>> old_tmp_vertices = balancer->getVertices();
+      // result temporary vertices
+      std::vector<Point<T>> result_vertices = balancer->getVertices();
+      // temporary work
+      W tmp_work = balancer->getWork().at(0);
+      // old temporary work
+      W old_tmp_work = balancer->getWork().at(0);
+
+      W max_work;
+      W sum_work;
+      double d_max;
+      int n_ranks;
+
+      // compute work density on local domain
+      double V = 1.0;
+      for (int i = 0; i < balancer->getDimension(); ++i)
+          V *= ( outline.at(1)[i] - outline.at(0)[i] );
+      double rho = workArray.at(0) / V;
+
+      // collect maximum work in system
+      MPI_Allreduce(workArray.data(), &max_work, 1, MPIDataTypeW, MPI_MAX,
+      communicator); MPI_Allreduce(workArray.data(), &sum_work, 1,
+      MPIDataTypeW, MPI_SUM, communicator);
+      MPI_Comm_size(communicator,&n_ranks);
+      d_max = (double)(max_work) * (double)n_ranks / (double)sum_work - 1.0;
+
+
+      // internal needs to be readded to argument list const bool internal = false
+      for (int i = 0; i < estimation_limit && d_max > 0.1 && !internal; ++i)
+      {
+          balancer->setWork(tmp_work);
+          balancer->setup();
+          balancer->balance(true);
+          bool sane = true;
+          // check if the estimated boundaries are not too deformed
+          for (int j = 0; j < balancer->getDimension(); ++j)
+              sane = sane && (result_vertices.at(0)[j] <=
+      old_vertices.at(1)[j]);
+          MPI_Allreduce(MPI_IN_PLACE,&sane,1,MPI_CXX_BOOL,MPI_LAND,communicator);
+          if (sane)
+          {
+              old_tmp_vertices = tmp_vertices;
+              tmp_vertices = result_vertices;
+              V = 1.0;
+              for (int i = 0; i < balancer->getDimension(); ++i)
+                  V *= ( tmp_vertices.at(1)[i] - tmp_vertices.at(0)[i] );
+              old_tmp_work = tmp_work;
+              tmp_work = rho * V;
+          }
+          else if (!sane || i == estimation_limit - 1)
+          {
+              balancer->getVertices() = old_tmp_vertices;
+              workArray->at(0) = old_tmp_work;
+              calculate_outline();
+              i = estimation_limit;
+          }
+      }
+
+      ((Staggered_LB<T,W>*)balancer.get())->setVertices(outline->data());
+      */
+      balancer->balance(loadbalancing_step);
+      break;
+    case LB_t::FORCEBASED:
+      balancer->balance(loadbalancing_step);
+      break;
 #ifdef ALL_VORONOI_ACTIVE
-      case LB_t::VORONOI:
-        balancer->balance(loadbalancing_step);
-        break;
+    case LB_t::VORONOI:
+      balancer->balance(loadbalancing_step);
+      break;
 #endif
-      case LB_t::HISTOGRAM:
-        balancer->balance(loadbalancing_step);
-        break;
-      default:
-        throw InvalidArgumentException(
-            __FILE__, __func__, __LINE__,
-            "Unknown type of loadbalancing passed.");
-      }
-      loadbalancing_step++;
-    } catch (CustomException &e) {
-      std::cout << e.what() << std::endl;
-      MPI_Abort(MPI_COMM_WORLD, -1);
+    case LB_t::HISTOGRAM:
+      balancer->balance(loadbalancing_step);
+      break;
+    default:
+      throw InvalidArgumentException(
+          __FILE__, __func__, __LINE__,
+          "Unknown type of loadbalancing passed.");
     }
+    loadbalancing_step++;
     return balancer->getVertices();
   }
 
@@ -416,7 +406,7 @@ public:
   /// performed
   /// @result std::vector<ALL::Point<T>> containing the resulting vertices after
   /// the balancing step
-  std::vector<Point<T>> &getResultVertices() {
+  std::vector<Point<T>> &getVertices() {
     return balancer->getVertices();
   };
 
@@ -431,58 +421,22 @@ public:
   void getWork(std::vector<W> &result) { result = balancer->getWork(); }
 
   /// method to get the work provided to the method
-  /// @param[out] result to store the work to if only a scalar was used, if an
-  /// array was used to provide work only the first value will be returned
-  void getWork(W &result) { result = balancer->getWork().at(0); }
-
-  /// method to get the number of neigbors the local domain has
-  /// @result int the number of neighbors the local domain has in total
-  int getNNeighbors() {
-    std::vector<int> neig;
-    try {
-      balancer->getNeighbors(neig);
-    } catch (CustomException &e) {
-      std::cout << e.what() << std::endl;
-      MPI_Abort(MPI_COMM_WORLD, -1);
-    }
-    return neig.size();
-  }
+  /// @result scalar work or first value of vector work
+  W getWork() { return balancer->getWork().at(0); }
 
   /// method to provide a list of the ranks of the neighbors the local domain
   /// has in all directions
-  /// @param[out] neig a reference to a std::vector<int> object, which the
-  /// neighbors will be stored in
-  void getNeighbors(std::vector<int> &neig) {
-    try {
-      balancer->getNeighbors(neig);
-    } catch (CustomException &e) {
-      std::cout << e.what() << std::endl;
-      MPI_Abort(MPI_COMM_WORLD, -1);
-    }
+  /// @result vector if neighboring ranks
+  std::vector<int> &getNeighbors() {
+    return balancer->getNeighbors();
   }
 
   /// method to provide a list of neighboring vertices, e.g. required for
   /// VORONOI
-  /// @param[out] nv a reference to a vector object, which the
+  /// @result vector of neighboring vertices
   /// neighboring vertices are stored in
-  void getNeighborVertices(std::vector<T> &nv) {
-    switch (method) {
-    case LB_t::TENSOR:
-      break;
-    case LB_t::STAGGERED:
-      break;
-    case LB_t::UNSTRUCTURED:
-      break;
-#ifdef ALL_VORONOI_ACTIVE
-    case LB_t::VORONOI:
-      ((Voronoi_LB<T, W> *)balancer.get())->getNeighborVertices(nv);
-      break;
-#endif
-    case LB_t::HISTOGRAM:
-      break;
-    default:
-      break;
-    }
+  std::vector<T> &getNeighborVertices() {
+      return balancer->getNeighborVertices();
   }
 
   /// method to set the size of the system, e.g. required for the HISTOGRAM
@@ -498,6 +452,10 @@ public:
   /// balancing methods
   /// @param[in] data pointer to a struct or other data object in the format the
   /// methods requires
+  ///
+  /// For the histogram method the number of bins per dimensions can be given
+  /// as a C array of `int`s. The length of the array must be the number of
+  /// dimensions given to the load balancer. BE CAREFUL this is not enforced!
   void setMethodData(const void *data) {
     balancer.get()->setAdditionalData(data);
   }
@@ -523,12 +481,7 @@ public:
   /// next-near neighbors instead of only with next neighbors
   /// @param[in] minSize the minimum size of a domain in each dimension
   void setMinDomainSize(const std::vector<T> &minSize) {
-    try {
-      (balancer.get())->setMinDomainSize(minSize);
-    } catch (CustomException &e) {
-      std::cout << e.what() << std::endl;
-      MPI_Abort(MPI_COMM_WORLD, -1);
-    }
+    (balancer.get())->setMinDomainSize(minSize);
   }
 
 #ifdef ALL_VTK_OUTPUT
@@ -654,9 +607,7 @@ public:
     unstructuredGrid->GetCellData()->AddArray(rnk);
     unstructuredGrid->GetCellData()->AddArray(tag);
 
-    if(!createDirectory("vtk_outline")) throw FilesystemErrorException(
-        __FILE__, __func__, __LINE__,
-        "Could not create output directory.");
+    createDirectory("vtk_outline");
     std::ostringstream ss_local;
     ss_local << "vtk_outline/ALL_vtk_outline_" << std::setw(7)
              << std::setfill('0') << step << "_" << local_rank << ".vtu";
@@ -692,134 +643,126 @@ public:
   /// @param[in] step the number of the loadbalancing step used for numbering
   /// the output files
   void printVTKvertices(const int step) {
-    try {
-      int n_ranks;
-      int local_rank;
+    int n_ranks;
+    int local_rank;
 
-      MPI_Comm_rank(communicator, &local_rank);
-      MPI_Comm_size(communicator, &n_ranks);
+    MPI_Comm_rank(communicator, &local_rank);
+    MPI_Comm_size(communicator, &n_ranks);
 
-      // local vertices
-      // (vertices + work)
-      T local_vertices[nVertices * balancer->getDimension() + 1];
+    // local vertices
+    // (vertices + work)
+    T local_vertices[nVertices * balancer->getDimension() + 1];
 
-      for (int v = 0; v < nVertices; ++v) {
-        for (int d = 0; d < balancer->getDimension(); ++d) {
-          local_vertices[v * balancer->getDimension() + d] =
-              balancer->getVertices().at(v)[d];
-        }
+    for (int v = 0; v < nVertices; ++v) {
+      for (int d = 0; d < balancer->getDimension(); ++d) {
+        local_vertices[v * balancer->getDimension() + d] =
+            balancer->getVertices().at(v)[d];
       }
-      local_vertices[nVertices * balancer->getDimension()] =
-          (T)balancer->getWork().at(0);
+    }
+    local_vertices[nVertices * balancer->getDimension()] =
+        (T)balancer->getWork().at(0);
 
-      T *global_vertices;
-      if (local_rank == 0) {
-        global_vertices =
-            new T[n_ranks * (nVertices * balancer->getDimension() + 1)];
-      }
+    T *global_vertices;
+    if (local_rank == 0) {
+      global_vertices =
+          new T[n_ranks * (nVertices * balancer->getDimension() + 1)];
+    }
 
-      // collect all works and vertices on a single process
-      MPI_Gather(local_vertices, nVertices * balancer->getDimension() + 1,
-                 MPIDataTypeT, global_vertices,
-                 nVertices * balancer->getDimension() + 1, MPIDataTypeT, 0,
-                 communicator);
-
-      if (local_rank == 0) {
-        auto vtkpoints = vtkSmartPointer<vtkPoints>::New();
-        auto unstructuredGrid = vtkSmartPointer<vtkUnstructuredGrid>::New();
-
-        // enter vertices into unstructured grid
-        for (int i = 0; i < n_ranks; ++i) {
-          for (int v = 0; v < nVertices; ++v) {
-            vtkpoints->InsertNextPoint(
-                global_vertices[i * (nVertices * balancer->getDimension() + 1) +
-                                v * balancer->getDimension() + 0],
-                global_vertices[i * (nVertices * balancer->getDimension() + 1) +
-                                v * balancer->getDimension() + 1],
-                global_vertices[i * (nVertices * balancer->getDimension() + 1) +
-                                v * balancer->getDimension() + 2]);
-          }
-        }
-        unstructuredGrid->SetPoints(vtkpoints);
-
-        // data arrays for work and cell id
-        auto work = vtkSmartPointer<vtkFloatArray>::New();
-        auto cell = vtkSmartPointer<vtkFloatArray>::New();
-        work->SetNumberOfComponents(1);
-        work->SetNumberOfTuples(n_ranks);
-        work->SetName("work");
-        cell->SetNumberOfComponents(1);
-        cell->SetNumberOfTuples(n_ranks);
-        cell->SetName("cell id");
-
-        for (int n = 0; n < n_ranks; ++n) {
-          // define grid points, i.e. vertices of local domain
-          vtkIdType pointIds[8] = {8 * n + 0, 8 * n + 1, 8 * n + 2, 8 * n + 3,
-                                   8 * n + 4, 8 * n + 5, 8 * n + 6, 8 * n + 7};
-
-          auto faces = vtkSmartPointer<vtkCellArray>::New();
-          // setup faces of polyhedron
-          vtkIdType f0[3] = {8 * n + 0, 8 * n + 2, 8 * n + 1};
-          vtkIdType f1[3] = {8 * n + 1, 8 * n + 2, 8 * n + 3};
-
-          vtkIdType f2[3] = {8 * n + 0, 8 * n + 4, 8 * n + 2};
-          vtkIdType f3[3] = {8 * n + 2, 8 * n + 4, 8 * n + 6};
-
-          vtkIdType f4[3] = {8 * n + 2, 8 * n + 6, 8 * n + 3};
-          vtkIdType f5[3] = {8 * n + 3, 8 * n + 6, 8 * n + 7};
-
-          vtkIdType f6[3] = {8 * n + 1, 8 * n + 5, 8 * n + 3};
-          vtkIdType f7[3] = {8 * n + 3, 8 * n + 5, 8 * n + 7};
-
-          vtkIdType f8[3] = {8 * n + 0, 8 * n + 4, 8 * n + 1};
-          vtkIdType f9[3] = {8 * n + 1, 8 * n + 4, 8 * n + 5};
-
-          vtkIdType fa[3] = {8 * n + 4, 8 * n + 6, 8 * n + 5};
-          vtkIdType fb[3] = {8 * n + 5, 8 * n + 6, 8 * n + 7};
-
-          faces->InsertNextCell(3, f0);
-          faces->InsertNextCell(3, f1);
-          faces->InsertNextCell(3, f2);
-          faces->InsertNextCell(3, f3);
-          faces->InsertNextCell(3, f4);
-          faces->InsertNextCell(3, f5);
-          faces->InsertNextCell(3, f6);
-          faces->InsertNextCell(3, f7);
-          faces->InsertNextCell(3, f8);
-          faces->InsertNextCell(3, f9);
-          faces->InsertNextCell(3, fa);
-          faces->InsertNextCell(3, fb);
-
-          unstructuredGrid->InsertNextCell(VTK_POLYHEDRON, 8, pointIds, 12,
-                                           faces->GetPointer());
-          work->SetValue(
-              n,
-              global_vertices[n * (nVertices * balancer->getDimension() + 1) +
-                              8 * balancer->getDimension()]);
-          cell->SetValue(n, (T)n);
+    // collect all works and vertices on a single process
+    MPI_Gather(local_vertices, nVertices * balancer->getDimension() + 1,
+               MPIDataTypeT, global_vertices,
+               nVertices * balancer->getDimension() + 1, MPIDataTypeT, 0,
+               communicator);
+
+    if (local_rank == 0) {
+      auto vtkpoints = vtkSmartPointer<vtkPoints>::New();
+      auto unstructuredGrid = vtkSmartPointer<vtkUnstructuredGrid>::New();
+
+      // enter vertices into unstructured grid
+      for (int i = 0; i < n_ranks; ++i) {
+        for (int v = 0; v < nVertices; ++v) {
+          vtkpoints->InsertNextPoint(
+              global_vertices[i * (nVertices * balancer->getDimension() + 1) +
+                              v * balancer->getDimension() + 0],
+              global_vertices[i * (nVertices * balancer->getDimension() + 1) +
+                              v * balancer->getDimension() + 1],
+              global_vertices[i * (nVertices * balancer->getDimension() + 1) +
+                              v * balancer->getDimension() + 2]);
         }
-        unstructuredGrid->GetCellData()->AddArray(work);
-        unstructuredGrid->GetCellData()->AddArray(cell);
-
-        if(!createDirectory("vtk_vertices")) throw FilesystemErrorException(
-            __FILE__, __func__, __LINE__,
-            "Could not create output directory.");
-        std::ostringstream filename;
-        filename << "vtk_vertices/ALL_vtk_vertices_" << std::setw(7)
-                 << std::setfill('0') << step << ".vtu";
-        auto writer = vtkSmartPointer<vtkXMLUnstructuredGridWriter>::New();
-        writer->SetInputData(unstructuredGrid);
-        writer->SetFileName(filename.str().c_str());
-        writer->SetDataModeToAscii();
-        // writer->SetDataModeToBinary();
-        writer->Write();
-
-        delete[] global_vertices;
       }
-
-    } catch (CustomException &e) {
-      std::cout << e.what() << std::endl;
-      MPI_Abort(MPI_COMM_WORLD, -1);
+      unstructuredGrid->SetPoints(vtkpoints);
+
+      // data arrays for work and cell id
+      auto work = vtkSmartPointer<vtkFloatArray>::New();
+      auto cell = vtkSmartPointer<vtkFloatArray>::New();
+      work->SetNumberOfComponents(1);
+      work->SetNumberOfTuples(n_ranks);
+      work->SetName("work");
+      cell->SetNumberOfComponents(1);
+      cell->SetNumberOfTuples(n_ranks);
+      cell->SetName("cell id");
+
+      for (int n = 0; n < n_ranks; ++n) {
+        // define grid points, i.e. vertices of local domain
+        vtkIdType pointIds[8] = {8 * n + 0, 8 * n + 1, 8 * n + 2, 8 * n + 3,
+                                 8 * n + 4, 8 * n + 5, 8 * n + 6, 8 * n + 7};
+
+        auto faces = vtkSmartPointer<vtkCellArray>::New();
+        // setup faces of polyhedron
+        vtkIdType f0[3] = {8 * n + 0, 8 * n + 2, 8 * n + 1};
+        vtkIdType f1[3] = {8 * n + 1, 8 * n + 2, 8 * n + 3};
+
+        vtkIdType f2[3] = {8 * n + 0, 8 * n + 4, 8 * n + 2};
+        vtkIdType f3[3] = {8 * n + 2, 8 * n + 4, 8 * n + 6};
+
+        vtkIdType f4[3] = {8 * n + 2, 8 * n + 6, 8 * n + 3};
+        vtkIdType f5[3] = {8 * n + 3, 8 * n + 6, 8 * n + 7};
+
+        vtkIdType f6[3] = {8 * n + 1, 8 * n + 5, 8 * n + 3};
+        vtkIdType f7[3] = {8 * n + 3, 8 * n + 5, 8 * n + 7};
+
+        vtkIdType f8[3] = {8 * n + 0, 8 * n + 4, 8 * n + 1};
+        vtkIdType f9[3] = {8 * n + 1, 8 * n + 4, 8 * n + 5};
+
+        vtkIdType fa[3] = {8 * n + 4, 8 * n + 6, 8 * n + 5};
+        vtkIdType fb[3] = {8 * n + 5, 8 * n + 6, 8 * n + 7};
+
+        faces->InsertNextCell(3, f0);
+        faces->InsertNextCell(3, f1);
+        faces->InsertNextCell(3, f2);
+        faces->InsertNextCell(3, f3);
+        faces->InsertNextCell(3, f4);
+        faces->InsertNextCell(3, f5);
+        faces->InsertNextCell(3, f6);
+        faces->InsertNextCell(3, f7);
+        faces->InsertNextCell(3, f8);
+        faces->InsertNextCell(3, f9);
+        faces->InsertNextCell(3, fa);
+        faces->InsertNextCell(3, fb);
+
+        unstructuredGrid->InsertNextCell(VTK_POLYHEDRON, 8, pointIds, 12,
+                                         faces->GetPointer());
+        work->SetValue(
+            n,
+            global_vertices[n * (nVertices * balancer->getDimension() + 1) +
+                            8 * balancer->getDimension()]);
+        cell->SetValue(n, (T)n);
+      }
+      unstructuredGrid->GetCellData()->AddArray(work);
+      unstructuredGrid->GetCellData()->AddArray(cell);
+
+      createDirectory("vtk_vertices");
+      std::ostringstream filename;
+      filename << "vtk_vertices/ALL_vtk_vertices_" << std::setw(7)
+               << std::setfill('0') << step << ".vtu";
+      auto writer = vtkSmartPointer<vtkXMLUnstructuredGridWriter>::New();
+      writer->SetInputData(unstructuredGrid);
+      writer->SetFileName(filename.str().c_str());
+      writer->SetDataModeToAscii();
+      // writer->SetDataModeToBinary();
+      writer->Write();
+
+      delete[] global_vertices;
     }
   }
 #endif
@@ -873,15 +816,17 @@ private:
   }
 
   /// create directory if it does not already exist
-  /// @result int 0 on success 1 if directory could not be created
-  int createDirectory(const char *filename) {
+  ///
+  /// May throw FilesystemErrorException
+  void createDirectory(const char *filename) {
     errno = 0;
     mode_t mode = S_IRWXU | S_IRWXG; // rwx for user and group
-    if(!mkdir(filename, mode)) {
-      if(errno == EEXIST) return 0;
-      return 1;
+    if(mkdir(filename, mode)) {
+      if(errno == EEXIST) return;
+      throw FilesystemErrorException(
+          __FILE__, __func__, __LINE__,
+          "Could not create output directory.");
     }
-    return 0;
   }
 
 
diff --git a/include/ALL_Unstructured.hpp b/include/ALL_ForceBased.hpp
similarity index 97%
rename from include/ALL_Unstructured.hpp
rename to include/ALL_ForceBased.hpp
index 08dcec5cca02cd0def570770f5ba9ac52a825e70..6ebbc3bea69a610b739f9ad0d81a8d4cceba667a 100644
--- a/include/ALL_Unstructured.hpp
+++ b/include/ALL_ForceBased.hpp
@@ -28,14 +28,14 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifndef ALL_UNSTRUCTURED_HEADER_INCLUDED
-#define ALL_UNSTRUCTURED_HEADER_INCLUDED
+#ifndef ALL_FORCEBASED_HEADER_INCLUDED
+#define ALL_FORCEBASED_HEADER_INCLUDED
 
 #define ALL_FORCE_OLD
 
 /*
 
-    Unstructured load-balancing scheme:
+    ForceBased load-balancing scheme:
 
     Requirements:
         a) cartesian communicator
@@ -82,20 +82,20 @@ namespace ALL {
 /// conserved throughout the load-balancing process.
 /// @tparam T data for vertices and related data
 /// @tparam W data for work and related data
-template <class T, class W> class Unstructured_LB : public LB<T, W> {
+template <class T, class W> class ForceBased_LB : public LB<T, W> {
 public:
   /// default constructor
-  Unstructured_LB() {}
+  ForceBased_LB() {}
   /// constructor initializing basic parameters
   /// @param[in] d the dimension of the used vertices
   /// @param[in] w the scalar work assigned to the local domain
   /// @param[in] g the correction factor gamma
-  Unstructured_LB(int d, W w, T g) : LB<T, W>(d, g) {
+  ForceBased_LB(int d, W w, T g) : LB<T, W>(d, g) {
     this->setWork(w);
   }
 
   /// default destructor
-  ~Unstructured_LB() override;
+  ~ForceBased_LB() override;
 
   /// setup internal data structures and parameters
   void setup() override;
@@ -110,7 +110,7 @@ public:
   /// method to provide a list of the neighbors of the local domain
   /// @param[out] list reference to a std::vector of integers where the list of
   /// neighbors will be assigned to
-  virtual void getNeighbors(std::vector<int> &list) override;
+  virtual std::vector<int> &getNeighbors() override;
 
   /// method to set specific data structures (unused for tensor grid method)
   /// @param[in] data pointer to the data structure
@@ -145,15 +145,15 @@ private:
   int sec_dim[2];
 };
 
-template <class T, class W> Unstructured_LB<T, W>::~Unstructured_LB() {}
+template <class T, class W> ForceBased_LB<T, W>::~ForceBased_LB() {}
 
 // provide list of neighbors
 template <class T, class W>
-void Unstructured_LB<T, W>::getNeighbors(std::vector<int> &ret) {
-  ret = neighbors;
+std::vector<int> &ForceBased_LB<T, W>::getNeighbors() {
+  return neighbors;
 }
 
-template <class T, class W> void Unstructured_LB<T, W>::setup() {
+template <class T, class W> void ForceBased_LB<T, W>::setup() {
   n_vertices = this->vertices.size();
   vertex_neighbors.resize(n_vertices * 8);
 
@@ -277,7 +277,7 @@ template <class T, class W> void Unstructured_LB<T, W>::setup() {
 #ifdef ALL_DEBUG_ENABLED
   MPI_Barrier(this->globalComm);
   if (this->localRank == 0)
-    std::cout << "ALL::Unstructured_LB<T,W>::setup() preparing communicators..."
+    std::cout << "ALL::ForceBased_LB<T,W>::setup() preparing communicators..."
               << std::endl;
 #endif
   std::vector<int> dim_vert(this->global_dims);
@@ -288,7 +288,7 @@ template <class T, class W> void Unstructured_LB<T, W>::setup() {
 #ifdef ALL_DEBUG_ENABLED
   MPI_Barrier(this->globalComm);
   if (this->localRank == 0)
-    std::cout << "ALL::Unstructured_LB<T,W>::setup() computing communicators..."
+    std::cout << "ALL::ForceBased_LB<T,W>::setup() computing communicators..."
               << std::endl;
   std::cout << "DEBUG: "
             << " rank: " << this->localRank << " dim_vert: " << dim_vert.at(0)
@@ -370,7 +370,7 @@ template <class T, class W> void Unstructured_LB<T, W>::setup() {
 #ifdef ALL_DEBUG_ENABLED
     MPI_Barrier(this->globalComm);
     if (this->localRank == 0)
-      std::cout << "ALL::Unstructured_LB<T,W>::setup() finished computing "
+      std::cout << "ALL::ForceBased_LB<T,W>::setup() finished computing "
                    "communicators..."
                 << std::endl;
 #endif
@@ -381,7 +381,7 @@ template <class T, class W> void Unstructured_LB<T, W>::setup() {
 //       for now: do not change the vertices along the edge of the system
 
 template <class T, class W>
-void Unstructured_LB<T, W>::balance(int /*step*/) {
+void ForceBased_LB<T, W>::balance(int /*step*/) {
 
   this->prevVertices = this->vertices;
 
diff --git a/include/ALL_Histogram.hpp b/include/ALL_Histogram.hpp
index 3f26614ecb473baae2d5dfcad894d283147a0a77..f7c93f914f0db9c621821003324c3362775f3389 100644
--- a/include/ALL_Histogram.hpp
+++ b/include/ALL_Histogram.hpp
@@ -92,7 +92,7 @@ public:
   // neighbors
   /// method to provide a list of neighbors by MPI rank
   /// @param [out] list the std::vector the list of neighbors is stored to
-  void getNeighbors(std::vector<int> &list) override;
+  std::vector<int> &getNeighbors() override;
 
   /// method to set method specific data
   /// @param data pointer to an array of integers (int) containing the number of
@@ -666,8 +666,8 @@ template <class T, class W> void Histogram_LB<T, W>::find_neighbors() {
 
 // provide list of neighbors
 template <class T, class W>
-void Histogram_LB<T, W>::getNeighbors(std::vector<int> &ret) {
-  ret = neighbors;
+std::vector<int> &Histogram_LB<T, W>::getNeighbors() {
+  return neighbors;
 }
 
 }//namespace ALL
diff --git a/include/ALL_LB.hpp b/include/ALL_LB.hpp
index 33ac7010e96c875960faa74d28d296ed2bd02ffa..b2b36c8ee0fda555e3992028f94605712b511198 100644
--- a/include/ALL_LB.hpp
+++ b/include/ALL_LB.hpp
@@ -51,6 +51,7 @@ public:
     periodicity.resize(dim);
     local_coords.resize(dim);
     global_dims.resize(dim);
+    neighborVertices.resize(0);
   }
 
   /// destructor
@@ -63,9 +64,9 @@ public:
   virtual void balance(const int step) = 0;
 
   /// abstract definition of the method to get the neighbors of the local domain
-  /// @param[out] list reference to std::vector<int> to store the MPI ranks of
-  /// the neighbors to
-  virtual void getNeighbors(std::vector<int> &list) = 0;
+  /// @result std::vector<int> to store the MPI ranks of the neighbors
+  /// to
+  virtual std::vector<int> &getNeighbors() = 0;
 
   /// method to update the vertices used for the balancing step, overwrites old
   /// set of vertices
@@ -162,6 +163,12 @@ public:
   /// @param[in] data the data be passed to the balancing method
   virtual void setAdditionalData(const void *data) = 0;
 
+  /// method to provide a list of vertices describing the neighboring domains
+  /// currently only implemented for VORONOI, as a means to get the anchor points
+  /// of the surrounding domains
+  std::vector<T> &getNeighborVertices() {
+    return neighborVertices; }
+
 protected:
   /// correction factor
   T gamma;
@@ -188,9 +195,12 @@ protected:
   std::vector<int> local_coords;
   /// periodicity of the MPI communicator / system
   std::vector<int> periodicity;
+  /// vertices describing neighboring domains
+  std::vector<T> neighborVertices;
   /// method the resize the vertex list
   /// @param [in] new_size the new size of the vertex list
   void resizeVertices(const int newSize) { vertices.resize(newSize); }
+
 };
 
 }//namespace ALL
diff --git a/include/ALL_Staggered.hpp b/include/ALL_Staggered.hpp
index e1c125e121b1177bbec48615616c508d871e6495..ea364821cf000aa2e4c5c49b6165fb2db77c66ca 100644
--- a/include/ALL_Staggered.hpp
+++ b/include/ALL_Staggered.hpp
@@ -85,7 +85,7 @@ public:
   /// method to provide a list of the neighbors of the local domain
   /// @param[out] list reference to a std::vector of integers where the list of
   /// neighbors will be assigned to
-  virtual void getNeighbors(std::vector<int> &list) override;
+  virtual std::vector<int> &getNeighbors() override;
 
   /// method to set specific data structures (unused for staggered grid method)
   /// @param[in] data pointer to the data structure
@@ -619,8 +619,8 @@ template <class T, class W> void Staggered_LB<T, W>::find_neighbors() {
 
 // provide list of neighbors
 template <class T, class W>
-void Staggered_LB<T, W>::getNeighbors(std::vector<int> &ret) {
-  ret = neighbors;
+std::vector<int> &Staggered_LB<T, W>::getNeighbors() {
+  return neighbors;
 }
 
 }//namespace ALL
diff --git a/include/ALL_Tensor.hpp b/include/ALL_Tensor.hpp
index b4708321fbc4498ed0b4f754e13426f330370e29..c5ca109f2e0195012027787eb2d5f4a00ab8eae2 100644
--- a/include/ALL_Tensor.hpp
+++ b/include/ALL_Tensor.hpp
@@ -86,7 +86,7 @@ public:
   /// method to provide a list of the neighbors of the local domain
   /// @param[out] list reference to a std::vector of integers where the list of
   /// neighbors will be assigned to
-  virtual void getNeighbors(std::vector<int> &list) override;
+  virtual std::vector<int> &getNeighbors() override;
 
   /// method to set specific data structures (unused for tensor grid method)
   /// @param[in] data pointer to the data structure
@@ -247,8 +247,8 @@ template <class T, class W> void Tensor_LB<T, W>::balance(int) {
 
 // provide list of neighbors
 template <class T, class W>
-void Tensor_LB<T, W>::getNeighbors(std::vector<int> &ret) {
-  ret = neighbors;
+std::vector<int> &Tensor_LB<T, W>::getNeighbors() {
+  return neighbors;
 }
 
 }//namespace ALL
diff --git a/include/ALL_Voronoi.hpp b/include/ALL_Voronoi.hpp
index d9abb94d39f0acc13272669dc777403aefad97f2..8cc2bdc4020b50e84394d5a140fadcb2838e4657 100644
--- a/include/ALL_Voronoi.hpp
+++ b/include/ALL_Voronoi.hpp
@@ -94,7 +94,7 @@ public:
   /// method to provide a list of the neighbors of the local domain
   /// @param[out] list reference to a std::vector of integers where the list of
   /// neighbors will be assigned to
-  virtual void getNeighbors(std::vector<int> &list) override;
+  virtual std::vector<int> &getNeighbors() override;
 
   /// method to set specific data structures (unused for tensor grid method)
   /// @param[in] data pointer to the data structure
@@ -102,7 +102,7 @@ public:
 
   /// method to get a list of the vertices of the neighboring domains
   /// @param[out] ret list of vertices of neighboring domains
-  void getNeighborVertices(std::vector<T> &ret);
+  std::vector<T> &getNeighborVertices();
 
 private:
   // MPI values
@@ -115,9 +115,6 @@ private:
   std::vector<int> neighbors;
   int nNeighbors[1];
 
-  // generator points of the neighbor cells
-  std::vector<T> neighbor_vertices;
-
   // collection of generator points
   std::vector<T> generator_points;
 
@@ -540,27 +537,22 @@ void Voronoi_LB<T, W>::balance(int known_unused loadbalancing_step) {
   nNeighbors[0] = neighbors.size();
 
   // clear old neighbor vertices
-  neighbor_vertices.clear();
+  this->neighborVertices.clear();
 
   // find vertices of neighbors
 
   for (auto n : neighbors) {
     for (int i = 0; i < dimension; ++i)
-      neighbor_vertices.push_back(generator_points.at(info_length * n + i));
+      this->neighborVertices.push_back(generator_points.at(info_length * n + i));
   }
 }
 
 // provide list of neighbors
 template <class T, class W>
-void Voronoi_LB<T, W>::getNeighbors(std::vector<int> &ret) {
-  ret = neighbors;
+std::vector<int> &Voronoi_LB<T, W>::getNeighbors() {
+  return neighbors;
 }
 
-// provide list of neighbor vertices
-template <class T, class W>
-void Voronoi_LB<T, W>::getNeighborVertices(std::vector<T> &ret) {
-  ret = neighbor_vertices;
-}
 
 }//namespace ALL
 
diff --git a/src/ALL_fortran.cpp b/src/ALL_fortran.cpp
index 17708b732b4dcc9bfd405fe3750d6d189ca980d1..36f46c31d0e622db2dbfd88fba1883cf7879e3bc 100644
--- a/src/ALL_fortran.cpp
+++ b/src/ALL_fortran.cpp
@@ -39,11 +39,18 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 static int ALL_errno;
 static const char* ALL_errdesc;
 
+typedef ALL::ALL<double, double> ALL_t;
+
+#ifdef ALL_FORTRAN_ERROR_ABORT
+#define ALL_try
+#define ALL_catch
+#else
 #define ALL_try try {
 #define ALL_catch } catch (ALL::CustomException &e) {  \
   ALL_errno = e.get_error_id();                   \
   ALL_errdesc = e.what();                         \
 }
+#endif
 
 /*
 This interface is unstable and subject to change! It is only used for the
@@ -54,7 +61,7 @@ the C++ API or Fortran interfaces
 extern "C" {
 // wrapper to create a ALL::ALL<double,double> object (should be the
 // most commonly used one)
-ALL::ALL<double, double> *all_init_c(ALL::LB_t method, const int dim, double gamma) {
+ALL_t *all_init_c(ALL::LB_t method, const int dim, double gamma) {
   ALL_try
   return new ALL::ALL<double, double>(method, dim, gamma);
   ALL_catch
@@ -62,14 +69,19 @@ ALL::ALL<double, double> *all_init_c(ALL::LB_t method, const int dim, double gam
 }
 
 // delete ALL object
-void all_finalize_c(ALL::ALL<double, double> *all_obj) {
+void all_finalize_c(ALL_t *all_obj) {
   delete all_obj;
   // todo(s.schulz): does this throw if all_obj is already deleted? can we detect it?
 }
 
+void all_set_gamma_c(ALL_t *all_obj, double gamma) {
+  ALL_try
+  all_obj->setGamma(gamma);
+  ALL_catch
+}
 
 // set grid parameters
-void all_set_proc_grid_params_c(ALL::ALL<double, double> *all_obj, int nloc,
+void all_set_proc_grid_params_c(ALL_t *all_obj, int nloc,
                                 int *loc, int nsize, int *size) {
   ALL_try
   int dim = all_obj->getDimension();
@@ -88,7 +100,7 @@ void all_set_proc_grid_params_c(ALL::ALL<double, double> *all_obj, int nloc,
 }
 
 // wrapper to set the minimum domain size
-void all_set_min_domain_size_c(ALL::ALL<double, double> *all_obj, int dim,
+void all_set_min_domain_size_c(ALL_t *all_obj, int dim,
                                double *domain_size) {
   ALL_try
   if (all_obj->getDimension() != dim)
@@ -101,14 +113,22 @@ void all_set_min_domain_size_c(ALL::ALL<double, double> *all_obj, int dim,
 }
 
 // wrapper to set the work (scalar only for the moment)
-void all_set_work_c(ALL::ALL<double, double> *all_obj, double work) {
+void all_set_work_c(ALL_t *all_obj, double work) {
   ALL_try
   all_obj->setWork(work);
   ALL_catch
 }
 
+// set multidimensional work
+void all_set_work_multi_c(ALL_t *all_obj, double *work, int dim) {
+  ALL_try
+  std::vector<double> t_work(work, work+dim);
+  all_obj->setWork(t_work);
+  ALL_catch
+}
+
 // wrapper to set the vertices (using an array of double values and dimension)
-void all_set_vertices_c(ALL::ALL<double, double> *all_obj, const int n,
+void all_set_vertices_c(ALL_t *all_obj, const int n,
                         const int dim, const double *vertices) {
   ALL_try
   std::vector<ALL::Point<double>> points(n, ALL::Point<double>(dim));
@@ -128,39 +148,50 @@ void all_set_vertices_c(ALL::ALL<double, double> *all_obj, const int n,
 }
 
 // wrapper to set the communicator
-void all_set_communicator_c(ALL::ALL<double, double> *all_obj, MPI_Fint fcomm) {
+void all_set_communicator_c(ALL_t *all_obj, MPI_Fint fcomm) {
   ALL_try
   MPI_Comm cComm = MPI_Comm_f2c(fcomm);
   all_obj->setCommunicator(cComm);
   ALL_catch
 }
 
+void all_set_sys_size_c(ALL_t *all_obj, double *size, int dim) {
+  ALL_try
+  std::vector<double> t_size(size, size+dim);
+  all_obj->setSysSize(t_size);
+  ALL_catch
+}
+
 // wrapper to setup routine
-void all_setup_c(ALL::ALL<double, double> *all_obj) {
+void all_setup_c(ALL_t *all_obj) {
   ALL_try
   all_obj->setup();
   ALL_catch
 }
 
+void all_set_method_data_histogram_c(ALL_t *all_obj, int *nbins) { all_obj->setMethodData(nbins); }
+
 // wrapper to call balance routine
-void all_balance_c(ALL::ALL<double, double> *all_obj) {
+void all_balance_c(ALL_t *all_obj) {
   ALL_try
   all_obj->balance();
   ALL_catch
 }
 
+void all_get_gamma_c(ALL_t *all_obj, double *gamma) { *gamma =  all_obj->getGamma(); }
+
 // wrapper to get number of new vertices
-void all_get_new_n_vertices_c(ALL::ALL<double, double> *all_obj, int *n_vertices) {
+void all_get_number_of_vertices_c(ALL_t *all_obj, int *n_vertices) {
   ALL_try
-  *n_vertices = (all_obj->getResultVertices()).size();
+  *n_vertices = (all_obj->getVertices()).size();
   ALL_catch
 }
 
 // wrapper to return new vertices
-void all_get_result_vertices_c(ALL::ALL<double, double> *all_obj, int n_vertices,
+void all_get_vertices_c(ALL_t *all_obj, int n_vertices,
                                double *vertices) {
   ALL_try
-  std::vector<ALL::Point<double>> tmp_vertices = all_obj->getResultVertices();
+  std::vector<ALL::Point<double>> tmp_vertices = all_obj->getVertices();
   int dimension = all_obj->getDimension();
   assert(n_vertices = tmp_vertices.size());
   for (int i = 0; i < n_vertices; ++i) {
@@ -172,14 +203,14 @@ void all_get_result_vertices_c(ALL::ALL<double, double> *all_obj, int n_vertices
 }
 
 // set process tag
-void all_set_proc_tag_c(ALL::ALL<double, double> *all_obj, int tag) {
+void all_set_proc_tag_c(ALL_t *all_obj, int tag) {
   ALL_try
   all_obj->setProcTag(tag);
   ALL_catch
 }
 
 // wrapper to return new vertices
-void all_get_prev_vertices_c(ALL::ALL<double, double> *all_obj, int n_vertices,
+void all_get_prev_vertices_c(ALL_t *all_obj, int n_vertices,
                              double *prevVertices) {
   ALL_try
   std::vector<ALL::Point<double>> tmp_vertices = all_obj->getPrevVertices();
@@ -194,22 +225,67 @@ void all_get_prev_vertices_c(ALL::ALL<double, double> *all_obj, int n_vertices,
 }
 
 // get currently set dimension
-void all_get_dimension_c(ALL::ALL<double, double> *all_obj, int *dim) {
+void all_get_dimension_c(ALL_t *all_obj, int *dim) {
   ALL_try
   *dim = all_obj->getDimension();
   ALL_catch
 }
 
+void all_get_length_of_work_c(ALL_t *all_obj, int *length) {
+  ALL_try
+  std::vector<double> work;
+  all_obj->getWork(work);
+  *length = work.size();
+  ALL_catch
+}
+
+void all_get_work_c(ALL_t *all_obj, double *work) {
+  ALL_try
+  *work = all_obj->getWork();
+  ALL_catch
+}
+
+void all_get_work_array_c(ALL_t *all_obj, double *work, int length) {
+  ALL_try
+  std::vector<double> all_work;
+  all_obj->getWork(all_work);
+  assert((int)all_work.size() == length);
+  memcpy(work,&all_work[0],length*sizeof(*work));
+  ALL_catch
+}
+
+void all_get_number_of_neighbors_c(ALL_t *all_obj, int *count) {
+  ALL_try
+  std::vector<int> neighbors = all_obj->getNeighbors();
+  *count = neighbors.size();
+  ALL_catch
+}
+
+void all_get_neighbors_c(ALL_t *all_obj, int *neighbors, int count) {
+  ALL_try
+  std::vector<int> all_neighbors = all_obj->getNeighbors();
+  assert((int)all_neighbors.size() == count);
+  memcpy(neighbors,&all_neighbors[0],count*sizeof(*neighbors));
+  ALL_catch
+}
+
+#ifdef ALL_VTK_OUTPUT
 // print VTK outlines
-void all_print_vtk_outlines_c(ALL::ALL<double, double> * all_obj known_unused, int known_unused step) {
+void all_print_vtk_outlines_c(ALL_t * all_obj known_unused, int known_unused step) {
   // todo(s.schulz): Should this function even be declared if ALL_VTK_OUTPUT isn't?
-#ifdef ALL_VTK_OUTPUT
   ALL_try
   all_obj->printVTKoutlines(step);
   ALL_catch
-#endif
 }
 
+void all_print_vtk_vertices_c(ALL_t * all_obj known_unused, int known_unused step) {
+  // todo(s.schulz): Should this function even be declared if ALL_VTK_OUTPUT isn't?
+  ALL_try
+  all_obj->printVTKvertices(step);
+  ALL_catch
+}
+#endif
+
 // retrieve error number
 int all_errno_c(void) { return ALL_errno; }
 
diff --git a/src/ALL_module.F90 b/src/ALL_module.F90
index df661de9b3f3bc39e2a88c192506ca0915d61a0c..ad9dcfd14937b5065ece390acbc6b176960de49f 100644
--- a/src/ALL_module.F90
+++ b/src/ALL_module.F90
@@ -42,7 +42,7 @@ module ALL_module
     ! The different loadbalancing methods. Must be kept in sync with C side.
     integer(c_int), public, parameter :: ALL_STAGGERED = 0
     integer(c_int), public, parameter :: ALL_TENSOR = 1
-    integer(c_int), public, parameter :: ALL_UNSTRUCTURED = 2
+    integer(c_int), public, parameter :: ALL_FORCEBASED = 2
 #ifdef ALL_VORONOI_ACTIVE
     integer(c_int), public, parameter :: ALL_VORONOI = 3
 #endif
@@ -61,18 +61,23 @@ module ALL_module
     ! Direct interface with C wrapper
     ! TODO add intent() to dummy arguments
     interface
-        function ALL_init_c(method, dim, gamma) result(this) bind(C)
+        function all_init_c(method, dim, gamma) result(this) bind(C)
             use iso_c_binding
             integer(c_int), value :: method
             integer(c_int), value :: dim
             real(c_double), value :: gamma
             type(c_ptr) :: this
         end function
-        subroutine ALL_finalize_c(obj) bind(C)
+        subroutine all_finalize_c(obj) bind(C)
             use iso_c_binding
             type(c_ptr), value :: obj
         end subroutine
-        subroutine ALL_set_proc_grid_params_c(obj, nloc, loc, nsize, size) bind(C)
+        subroutine all_set_gamma_c(obj, gamma) bind(C)
+            use iso_c_binding
+            type(c_ptr), value :: obj
+            real(c_double), value :: gamma
+        end subroutine
+        subroutine all_set_proc_grid_params_c(obj, nloc, loc, nsize, size) bind(C)
             use iso_c_binding
             type(c_ptr), value :: obj
             integer(c_int), value :: nloc
@@ -96,6 +101,12 @@ module ALL_module
             type(c_ptr), value :: obj
             real(c_double), value :: work
         end subroutine
+        subroutine all_set_work_multi_c(obj, work, dim) bind(C)
+            use iso_c_binding
+            type(c_ptr), value :: obj
+            integer(c_int), value :: dim
+            real(c_double), dimension(dim) :: work
+        end subroutine
         subroutine all_set_vertices_c(obj, n, dim, vertices) bind(C)
             use iso_c_binding
             type(c_ptr), value :: obj
@@ -108,20 +119,36 @@ module ALL_module
             type(c_ptr), value :: obj
             integer, value :: comm
         end subroutine
-        subroutine ALL_setup_c(obj) bind(C)
+        subroutine all_set_sys_size_c(obj, size, dim) bind(C)
+            use iso_c_binding
+            type(c_ptr), value :: obj
+            integer(c_int), value :: dim
+            real(c_double), dimension(dim) :: size
+        end subroutine
+        subroutine all_set_method_data_histogram_c(obj, nbins) bind(C)
+            use iso_c_binding
+            type(c_ptr), value :: obj
+            integer(c_int), dimension(3) :: nbins
+        end subroutine
+        subroutine all_setup_c(obj) bind(C)
             use iso_c_binding
             type(c_ptr), value :: obj
         end subroutine
-        subroutine ALL_balance_c(obj) bind(C)
+        subroutine all_balance_c(obj) bind(C)
             use iso_c_binding
             type(c_ptr), value :: obj
         end subroutine
-        subroutine all_get_new_n_vertices_c(obj, n) bind(C)
+        subroutine all_get_gamma_c(obj, gamma) bind(C)
+            use iso_c_binding
+            type(c_ptr), value :: obj
+            real(c_double) :: gamma
+        end subroutine
+        subroutine all_get_number_of_vertices_c(obj, n) bind(C)
             use iso_c_binding
             type(c_ptr), value :: obj
             integer(c_int) :: n
         end subroutine
-        subroutine all_get_result_vertices_c(obj, n, vertices) bind(C)
+        subroutine all_get_vertices_c(obj, n, vertices) bind(C)
             use iso_c_binding
             type(c_ptr), value :: obj
             integer(c_int), value :: n
@@ -133,16 +160,50 @@ module ALL_module
             integer(c_int), value :: n
             real(c_double), dimension(*) :: vertices
         end subroutine
-        subroutine ALL_get_dimension_c(obj,dim) bind(C)
+        subroutine all_get_dimension_c(obj,dim) bind(C)
             use iso_c_binding
             type(c_ptr), value :: obj
             integer(c_int) :: dim
         end subroutine
-        subroutine ALL_print_vtk_outlines_c(obj, step) bind(C)
+        subroutine all_get_length_of_work_c(obj, length) bind(C)
+            use iso_c_binding
+            type(c_ptr), value :: obj
+            integer(c_int) :: length
+        end subroutine
+        subroutine all_get_work_c(obj, work) bind(C)
+            use iso_c_binding
+            type(c_ptr), value :: obj
+            real(c_double) :: work
+        end subroutine
+        subroutine all_get_work_array_c(obj, work, length) bind(C)
+            use iso_c_binding
+            type(c_ptr), value :: obj
+            integer(c_int), value :: length
+            real(c_double), dimension(length) :: work
+        end subroutine
+        subroutine all_get_number_of_neighbors_c(obj, count) bind(C)
+            use iso_c_binding
+            type(c_ptr), value :: obj
+            integer(c_int) :: count
+        end subroutine
+        subroutine all_get_neighbors_c(obj, neighbors, count) bind(C)
+            use iso_c_binding
+            type(c_ptr), value :: obj
+            integer(c_int), value :: count
+            integer(c_int), dimension(count) :: neighbors
+        end subroutine
+#ifdef ALL_VTK_OUTPUT
+        subroutine all_print_vtk_outlines_c(obj, step) bind(C)
             use iso_c_binding
             type(c_ptr), value :: obj
             integer(c_int), value :: step
         end subroutine
+        subroutine all_print_vtk_vertices_c(obj, step) bind(C)
+            use iso_c_binding
+            type(c_ptr), value :: obj
+            integer(c_int), value :: step
+        end subroutine
+#endif
         function all_errno_c() result(errno) bind(C)
             use iso_c_binding
             integer(c_int) :: errno
@@ -163,43 +224,69 @@ module ALL_module
     contains
         procedure :: init => ALL_init
         procedure :: finalize => ALL_finalize
+        procedure :: set_gamma => ALL_set_gamma
         procedure :: set_proc_grid_params => ALL_set_proc_grid_params
         procedure :: set_proc_tag => ALL_set_proc_tag
         procedure :: set_min_domain_size => ALL_set_min_domain_size
         procedure :: set_work => ALL_set_work
+        procedure :: set_work_multi => ALL_set_work_multi
         procedure :: set_vertices => ALL_set_vertices
 #ifdef ALL_USE_F08
         procedure :: set_communicator => ALL_set_communicator_f08
 #else
         procedure :: set_communicator => ALL_set_communicator_int
 #endif
+        procedure :: set_sys_size => ALL_set_sys_size
+        procedure :: set_method_data_histgram => ALL_set_method_data_histgram
         procedure :: setup => ALL_setup
         procedure :: balance => ALL_balance
-        procedure :: get_new_n_vertices => ALL_get_new_n_vertices
-        procedure :: get_result_vertices => ALL_get_result_vertices
-        procedure :: get_result_vertices_alloc => ALL_get_result_vertices_alloc
+        procedure :: get_gamma => ALL_get_gamma
+        procedure :: get_number_of_vertices => ALL_get_number_of_vertices
+        procedure :: get_vertices => ALL_get_vertices
+        procedure :: get_vertices_alloc => ALL_get_vertices_alloc
         procedure :: get_prev_vertices => ALL_get_prev_vertices
         procedure :: get_dimension => ALL_get_dimension
+        procedure :: get_length_of_work => ALL_get_length_of_work
+        procedure :: get_work => ALL_get_work
+        procedure :: get_work_array => ALL_get_work_array
+        procedure :: get_number_of_neighbors => ALL_get_number_of_neighbors
+        procedure :: get_neighbors => ALL_get_neighbors
+#ifdef ALL_VTK_OUTPUT
         procedure :: print_vtk_outlines => ALL_print_vtk_outlines
+        procedure :: print_vtk_vertices => ALL_print_vtk_vertices
+#endif
     end type
 
     ! This is the old, but still supported API of separate functions
     public :: ALL_init
     public :: ALL_finalize
+    public :: ALL_set_gamma
     public :: ALL_set_proc_grid_params
     public :: ALL_set_proc_tag
     public :: ALL_set_min_domain_size
     public :: ALL_set_work
+    public :: ALL_set_work_multi
     public :: ALL_set_vertices
     public :: ALL_set_communicator
+    public :: ALL_set_sys_size
+    public :: ALL_set_method_data_histgram
     public :: ALL_setup
     public :: ALL_balance
-    public :: ALL_get_new_n_vertices
-    public :: ALL_get_result_vertices
-    public :: ALL_get_result_vertices_alloc
+    public :: ALL_get_gamma
+    public :: ALL_get_number_of_vertices
+    public :: ALL_get_vertices
+    public :: ALL_get_vertices_alloc
     public :: ALL_get_prev_vertices
     public :: ALL_get_dimension
+    public :: ALL_get_length_of_work
+    public :: ALL_get_work
+    public :: ALL_get_work_array
+    public :: ALL_get_number_of_neighbors
+    public :: ALL_get_neighbors
+#ifdef ALL_VTK_OUTPUT
     public :: ALL_print_vtk_outlines
+    public :: ALL_print_vtk_vertices
+#endif
     public :: ALL_error
     public :: ALL_reset_error
     public :: ALL_error_description
@@ -217,20 +304,26 @@ contains
         integer, intent(in) :: method !< Must be one of the ALL_* method values
         integer, intent(in) :: dim !< dimensionality of system
         real(c_double), intent(in) :: gamma !< gamma value for load balancer (ignored for TENSOR and STAGGERED)
-        this%object = ALL_init_c(method, dim, gamma)
+        this%object = all_init_c(method, dim, gamma)
         this%dim = dim
     end subroutine
     !> Delete the loadbalance object
     subroutine ALL_finalize(this)
         class(ALL_t), intent(in) :: this
-        call ALL_finalize_c(this%object)
+        call all_finalize_c(this%object)
+    end subroutine
+    !> Set gamma value for load balancer
+    subroutine ALL_set_gamma(this, gamma)
+        class(ALL_t), intent(in) :: this
+        real(c_double), intent(in) :: gamma
+        call all_set_gamma_c(this%object, gamma)
     end subroutine
     !> Set the grid parameters for this process
     subroutine ALL_set_proc_grid_params(this, loc, ranks)
         class(ALL_t), intent(in) :: this
         integer, dimension(this%dim), intent(in) :: loc !< index of domain in `dim` directions (0-indexed!)
         integer, dimension(this%dim), intent(in) :: ranks !< total number of domains in `dim` directions
-        call ALL_set_proc_grid_params_c(this%object, this%dim, loc, this%dim, ranks)
+        call all_set_proc_grid_params_c(this%object, this%dim, loc, this%dim, ranks)
     end subroutine
     !> Set process identifying tag for output
     subroutine ALL_set_proc_tag(this, tag)
@@ -250,6 +343,18 @@ contains
         real(c_double), intent(in) :: work !< work of this domain
         call all_set_work_c(this%object, work)
     end subroutine
+    !> Set multi dimensional work of this process
+    subroutine ALL_set_work_multi(this, work)
+        class(ALL_t), intent(in) :: this
+        real(c_double), dimension(:), intent(in) :: work !< multi dimensional work of this domain
+#ifndef NDEBUG
+        if(size(work) /= this%dim) then
+            write(error_unit, '(a)') "ALL_set_work_multi: Wrong dimension for work"
+            stop
+        endif
+#endif
+        call all_set_work_multi_c(this%object, work, size(work))
+    end subroutine
     !> Set new vertices
     !!
     !! @todo write interface for single rank array of vertices
@@ -258,7 +363,7 @@ contains
         real(c_double), dimension(:,:), intent(in) :: vertices !< vertices of domain, for `n` domains must have shape: (dim,n)
 #ifndef NDEBUG
         if(size(vertices,1) /= this%dim) then
-            write(error_unit,'(a)') "ALL_setVertices: Wrong dimension for vertices"
+            write(error_unit,'(a)') "ALL_set_vertices: Wrong dimension for vertices"
             stop
         endif
 #endif
@@ -279,49 +384,73 @@ contains
         integer, intent(in) :: comm !< MPI Communicator, not type checked!
         call all_set_communicator_c(this%object, comm)
     end subroutine
+    !> Set size of system, which is required for the histogram method
+    subroutine ALL_set_sys_size(this, syssize)
+        class(ALL_t), intent(in) :: this
+        real(c_double), dimension(:), intent(in) :: syssize !< system size
+#ifndef NDEBUG
+        if(size(syssize) /= this%dim) then
+            write(error_unit, '(a)') "ALL_set_size: Wrong dimension for size"
+            stop
+        endif
+#endif
+        call all_set_sys_size_c(this%object, syssize, size(syssize))
+    end subroutine
+    !> Set number of bins for histogram method
+    subroutine ALL_set_method_data_histgram(this, nbins)
+        class(ALL_t), intent(in) :: this
+        integer(c_int), dimension(3), intent(in) :: nbins !< Number of bins per dimension
+        call all_set_method_data_histogram_c(this%object, nbins)
+    end subroutine
     !> Set up the loadbalancer
     subroutine ALL_setup(this)
         class(ALL_t), intent(in) :: this
-        call ALL_setup_c(this%object)
+        call all_setup_c(this%object)
     end subroutine
     !> Run loadbalancer and calculate new vertices
     subroutine ALL_balance(this)
         class(ALL_t), intent(in) :: this
-        call ALL_balance_c(this%object)
+        call all_balance_c(this%object)
+    end subroutine
+    !> Retrieve currently set gamma value of balancer
+    subroutine ALL_get_gamma(this, gamma)
+        class(ALL_t), intent(in) :: this
+        real(c_double), intent(out) :: gamma
+        call all_get_gamma_c(this%object, gamma)
     end subroutine
     !> Retrieve number of vertices for current vertices
-    subroutine ALL_get_new_n_vertices(this, n)
+    subroutine ALL_get_number_of_vertices(this, n)
         class(ALL_t), intent(in) :: this
         integer(c_int), intent(out) :: n !< set to number of new vertices
-        call all_get_new_n_vertices_c(this%object, n)
+        call all_get_number_of_vertices_c(this%object, n)
     end subroutine
     !> Retrieve current vertices
-    subroutine ALL_get_result_vertices(this, vertices)
+    subroutine ALL_get_vertices(this, vertices)
         class(ALL_t), intent(in) :: this
         real(c_double), dimension(:,:), intent(out) :: vertices !< set to new vertices, must be exact size (dim,n)
         integer(c_int) :: n_verts
-        call this%get_new_n_vertices(n_verts)
+        call this%get_number_of_vertices(n_verts)
 #ifndef NDEBUG
         if(size(vertices,1) /= this%dim .or. size(vertices,2) < n_verts) then
-            write(error_unit,'(a)') "ALL_getResultVertices: vertices array not large enough!"
+            write(error_unit,'(a)') "ALL_get_vertices: vertices array not large enough!"
             stop
         endif
 #endif
-        call all_get_result_vertices_c(this%object, n_verts, vertices)
+        call all_get_vertices_c(this%object, n_verts, vertices)
     end subroutine
-    !> Same function as getResultVertices, but takes an allocatable array, and resizes automatically
-    subroutine ALL_get_result_vertices_alloc(this, vertices)
+    !> Same function as get_vertices, but takes an allocatable array, and resizes automatically
+    subroutine ALL_get_vertices_alloc(this, vertices)
         class(ALL_t), intent(in) :: this
         real(c_double), dimension(:,:), allocatable, intent(inout) :: vertices !< set to new vertices, may be reallocated to fit
         integer(c_int) :: n_verts
-        call this%get_new_n_vertices(n_verts)
+        call this%get_number_of_vertices(n_verts)
         if(allocated(vertices)) then
             if(size(vertices,1) /= this%dim .or. size(vertices,2) < n_verts) then
                 deallocate(vertices)
             endif
         endif
         if(.not.allocated(vertices)) allocate(vertices(this%dim, n_verts))
-        call all_get_result_vertices_c(this%object, n_verts, vertices)
+        call all_get_vertices_c(this%object, n_verts, vertices)
     end subroutine
     !> Retrieve vertices from before loadbalancing
     subroutine ALL_get_prev_vertices(this, vertices)
@@ -330,7 +459,7 @@ contains
 #ifndef NDEBUG
         ! TODO Check against number of vertices not only dimensionality
         if(size(vertices,1) /= this%dim) then
-            write(error_unit,'(a)') "ALL_getResultVertices: vertices array not large enough!"
+            write(error_unit,'(a)') "ALL_get_prev_vertices: vertices array not large enough!"
             stop
         endif
 #endif
@@ -340,20 +469,71 @@ contains
     subroutine ALL_get_dimension(this, dim)
         class(ALL_t), intent(in) :: this
         integer(c_int), intent(out) :: dim
-        call ALL_get_dimension_c(this%object,dim)
+        call all_get_dimension_c(this%object,dim)
     end subroutine
+    !> Retrieve length of work array
+    subroutine ALL_get_length_of_work(this, length)
+        class(ALL_t), intent(in) :: this
+        integer(c_int), intent(out) :: length
+        call all_get_length_of_work_c(this%object, length)
+    end subroutine
+    !> Retrieve first element of work array
+    subroutine ALL_get_work(this, work)
+        class(ALL_t), intent(in) :: this
+        real(c_double), intent(out) :: work
+        call all_get_work_c(this%object, work)
+    end subroutine
+    !> Retrieve work array, which must already be the correct size
+    subroutine ALL_get_work_array(this, work)
+        class(ALL_t), intent(in) :: this
+        real(c_double), dimension(:), intent(out) :: work
+        integer :: length
+        call ALL_get_length_of_work(this, length)
+        if(size(work) /= length) then
+            write(error_unit,'(a)') "ALL_get_work_array: work has wrong length!"
+            stop
+        endif
+        call all_get_work_array_c(this%object, work, size(work))
+    end subroutine
+    !> Retrieve number of neighbors (i.e. length of neighbors list)
+    subroutine ALL_get_number_of_neighbors(this, count)
+        class(ALL_t), intent(in) :: this
+        integer(c_int), intent(out) :: count
+        call all_get_number_of_vertices_c(this%object, count)
+    end subroutine
+    !> Retrieve list of neighboring ranks (must be correct size already)
+    subroutine ALL_get_neighbors(this, neighbors)
+        class(ALL_t), intent(in) :: this
+        integer(c_int), dimension(:), intent(out) :: neighbors
+        integer :: count
+        call ALL_get_number_of_neighbors(this, count)
+        if(size(neighbors) /= count) then
+            write(error_unit,'(a)') "ALL_get_neighbors: neighbors has wrong length!"
+            stop
+        endif
+        call all_get_neighbors_c(this%object, neighbors, size(neighbors))
+    end subroutine
+
+#ifdef ALL_VTK_OUTPUT
     !> Print VTK outlines (must be enabled in build step)
     subroutine ALL_print_vtk_outlines(this, step)
         class(ALL_t), intent(in) :: this
         integer(c_int), intent(in) :: step !< current step, used for filename
-        call ALL_print_vtk_outlines_c(this%object, step)
+        call all_print_vtk_outlines_c(this%object, step)
     end subroutine
+    !> Print VTK domain vertices (must be enabled in build step)
+    subroutine ALL_print_vtk_vertices(this, step)
+        class(ALL_t), intent(in) :: this
+        integer(c_int), intent(in) :: step !< current step, used for filename
+        call all_print_vtk_vertices_c(this%object, step)
+    end subroutine
+#endif
     function ALL_error() result(error)
         integer :: error
         error = all_errno_c()
     end function
     subroutine ALL_reset_error()
-        call ALL_reset_errno_c()
+        call all_reset_errno_c()
     end subroutine
     function ALL_error_description() result(desc)
         character(len=ALL_ERROR_LENGTH) :: desc
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 6214431aa7ea5161adaea8aee5b680e9767d971e..283e2ceda34219ea46a2329e62030ce9215a6b0a 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -5,7 +5,7 @@ set(ALL_HEADER_FILES ${ALL_INCLUDE_DIR}/ALL.hpp
                      ${ALL_INCLUDE_DIR}/ALL_Histogram.hpp
                      ${ALL_INCLUDE_DIR}/ALL_Staggered.hpp
                      ${ALL_INCLUDE_DIR}/ALL_Tensor.hpp
-                     ${ALL_INCLUDE_DIR}/ALL_Unstructured.hpp
+                     ${ALL_INCLUDE_DIR}/ALL_ForceBased.hpp
                      ${ALL_INCLUDE_DIR}/ALL_Voronoi.hpp
                      ${ALL_INCLUDE_DIR}/ALL_Point.hpp
                      ${ALL_INCLUDE_DIR}/ALL_CustomExceptions.hpp)