diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py
index 9d44ce0b0e8d7b0bac9c188c697a5e65ab67df4c..6f3c1ceff7292ff9096ea3edba652ef19b8aa771 100644
--- a/mlair/run_modules/pre_processing.py
+++ b/mlair/run_modules/pre_processing.py
@@ -8,6 +8,8 @@ import os
 import traceback
 from typing import Tuple
 import multiprocessing
+
+import numpy as np
 import requests
 import psutil
 
@@ -65,9 +67,37 @@ class PreProcessing(RunEnvironment):
             raise ValueError("Couldn't find any valid data according to given parameters. Abort experiment run.")
         self.data_store.set("stations", valid_stations)
         self.split_train_val_test()
+        self.apply_oversampling()
         self.report_pre_processing()
         self.prepare_competitors()
 
+    def apply_oversampling(self):
+        #if Abfrage for oversampling=True/False
+        bins = 10
+        rates_cap = 20
+        data = self.data_store.get('data_collection', 'train')
+        histogram = np.array(bins)
+        #get min and max of the whole data
+        min = 0
+        max = 0
+        for station in data:
+            min = np.minimum(np.amin(station.get_Y(as_numpy=True)), min)
+            max = np.maximum(np.amax(station.get_Y(as_numpy=True)), max)
+        for station in data:
+            # erstelle Histogramm mit numpy für jede Station
+            hist, _ = np.histogram(station.get_Y(as_numpy=True), bins=bins, range=(min,max))
+            #histograms.append(hist)
+            histogram = histogram + hist
+        # Addiere alle Histogramme zusammen
+        #histogram = histograms[0]+histograms[1]+histograms[2]+histograms[3]
+        #teile durch gesamtanzahl
+        histogram = 1/np.sum(histogram) * histogram
+        #mult mit 1/häufigste Klasse
+        histogram = 1/np.amax(histogram) * histogram
+        #Oversampling 1/Kl
+        oversampling_rates = 1 / histogram
+        oversampling_rates_capped = np.minimum(oversampling_rates, rates_cap)
+
     def report_pre_processing(self):
         """Log some metrics on data and create latex report."""
         logging.debug(20 * '##')