Skip to content
Snippets Groups Projects
Commit 45199b6c authored by v.gramlich1's avatar v.gramlich1
Browse files

apply_oversampling calculates the desired oversampling_rates

parent 74e6e53c
No related branches found
No related tags found
1 merge request!302Draft: Resolve "Class-based Oversampling technique"
Pipeline #70534 passed
......@@ -8,6 +8,8 @@ import os
import traceback
from typing import Tuple
import multiprocessing
import numpy as np
import requests
import psutil
......@@ -65,9 +67,37 @@ class PreProcessing(RunEnvironment):
raise ValueError("Couldn't find any valid data according to given parameters. Abort experiment run.")
self.data_store.set("stations", valid_stations)
self.split_train_val_test()
self.apply_oversampling()
self.report_pre_processing()
self.prepare_competitors()
def apply_oversampling(self):
#if Abfrage for oversampling=True/False
bins = 10
rates_cap = 20
data = self.data_store.get('data_collection', 'train')
histogram = np.array(bins)
#get min and max of the whole data
min = 0
max = 0
for station in data:
min = np.minimum(np.amin(station.get_Y(as_numpy=True)), min)
max = np.maximum(np.amax(station.get_Y(as_numpy=True)), max)
for station in data:
# erstelle Histogramm mit numpy für jede Station
hist, _ = np.histogram(station.get_Y(as_numpy=True), bins=bins, range=(min,max))
#histograms.append(hist)
histogram = histogram + hist
# Addiere alle Histogramme zusammen
#histogram = histograms[0]+histograms[1]+histograms[2]+histograms[3]
#teile durch gesamtanzahl
histogram = 1/np.sum(histogram) * histogram
#mult mit 1/häufigste Klasse
histogram = 1/np.amax(histogram) * histogram
#Oversampling 1/Kl
oversampling_rates = 1 / histogram
oversampling_rates_capped = np.minimum(oversampling_rates, rates_cap)
def report_pre_processing(self):
"""Log some metrics on data and create latex report."""
logging.debug(20 * '##')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment