Skip to content
Snippets Groups Projects
Commit f54a7b87 authored by v.gramlich1's avatar v.gramlich1
Browse files

Changes according to the threads, using histogram += hist and histogram /=...

Changes according to the threads, using histogram += hist and histogram /= np.amax(histogram) leads to error because of wrong shape
parent 21203700
Branches
No related tags found
1 merge request!302Draft: Resolve "Class-based Oversampling technique"
Pipeline #70735 passed
...@@ -71,32 +71,29 @@ class PreProcessing(RunEnvironment): ...@@ -71,32 +71,29 @@ class PreProcessing(RunEnvironment):
self.report_pre_processing() self.report_pre_processing()
self.prepare_competitors() self.prepare_competitors()
def apply_oversampling(self): def apply_oversampling(self, bins=10, rates_cap=20):
#if Abfrage for oversampling=True/False #if request for oversampling=True/False
bins = 10
rates_cap = 20
data = self.data_store.get('data_collection', 'train') data = self.data_store.get('data_collection', 'train')
histogram = np.array(bins) histogram = np.array(bins)
#get min and max of the whole data #get min and max of the whole data
min = 0 total_min = 0
max = 0 total_max = 0
for station in data: for station in data:
min = np.minimum(np.amin(station.get_Y(as_numpy=True)), min) total_min = np.minimum(np.amin(station.get_Y(as_numpy=True)), total_min)
max = np.maximum(np.amax(station.get_Y(as_numpy=True)), max) total_max = np.maximum(np.amax(station.get_Y(as_numpy=True)), total_max)
for station in data: for station in data:
# erstelle Histogramm mit numpy für jede Station # Create histogram for each station
hist, _ = np.histogram(station.get_Y(as_numpy=True), bins=bins, range=(min,max)) hist, _ = np.histogram(station.get_Y(as_numpy=True), bins=bins, range=(total_min,total_max))
#histograms.append(hist) # Add up histograms
histogram = histogram + hist histogram = histogram + hist
# Addiere alle Histogramme zusammen # Scale down to most frequent class=1
#histogram = histograms[0]+histograms[1]+histograms[2]+histograms[3]
#teile durch gesamtanzahl
histogram = 1/np.sum(histogram) * histogram
#mult mit 1/häufigste Klasse
histogram = 1/np.amax(histogram) * histogram histogram = 1/np.amax(histogram) * histogram
#Oversampling 1/Kl # Get Oversampling rates (with and without cap)
oversampling_rates = 1 / histogram oversampling_rates = 1 / histogram
oversampling_rates_capped = np.minimum(oversampling_rates, rates_cap) oversampling_rates_capped = np.minimum(oversampling_rates, rates_cap)
# Add to datastore
self.data_store.set('oversampling_rates', oversampling_rates, 'training')
self.data_store.set('oversampling_rates_capped', oversampling_rates_capped, 'training')
def report_pre_processing(self): def report_pre_processing(self):
"""Log some metrics on data and create latex report.""" """Log some metrics on data and create latex report."""
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment