From afc7d54480680f6b28f2507bbd17b8f41bcf28ae Mon Sep 17 00:00:00 2001
From: Andreas Herten <a.herten@fz-juelich.de>
Date: Sun, 17 Nov 2019 15:38:03 -0700
Subject: [PATCH] Add Non-Notebook scripts for visalization

---
 .../Handson/.master/graphing.py               | 144 ++++++++++++------
 1 file changed, 99 insertions(+), 45 deletions(-)

diff --git a/2-Performance_Counters/Handson/.master/graphing.py b/2-Performance_Counters/Handson/.master/graphing.py
index 6ff367c..66672ef 100644
--- a/2-Performance_Counters/Handson/.master/graphing.py
+++ b/2-Performance_Counters/Handson/.master/graphing.py
@@ -1,52 +1,110 @@
+import matplotlib as mpl
+mpl.use('Agg')
 import matplotlib.pyplot as plt
-import pandas as pd
+import numpy as np
 import seaborn as sns
+import pandas as pd
+import common
 sns.set()
 plt.rcParams['figure.figsize'] = [14, 6]
 
-import common
+def linear_function(x, a, b):
+    return a*x+b
 
 def task1(input="poisson2d.ins_cyc.bin.csv"):
-	df = pd.read_csv(input, skiprows=range(2, 50000, 2))  # Read in the CSV file from the bench run; parse with Pandas
-	common.normalize(df, "PM_INST_CMPL (min)", "Instructions / Loop Iteration")  # Normalize to each iteration
-	common.normalize(df, "PM_RUN_CYC (min)", "Cycles / Loop Iteration")
+	df = pd.read_csv("poisson2d.ins_cyc.bin.csv", skiprows=range(2, 50000, 2))  # Read in the CSV file from the bench run; parse with Pandas
+	df["Grid Points"] = df["nx"] * df["ny"]  # Add a new column of the number of grid points (the product of nx and ny)
+	fit_parameters, fit_covariance = common.print_and_return_fit(
+	    ["PM_RUN_CYC (min)", "PM_INST_CMPL (min)"], 
+	    df.set_index("Grid Points"), 
+	    linear_function,
+	    format_uncertainty=".4f"
+	)
 	fig, (ax1, ax2) = plt.subplots(nrows=2, sharex=True)
-	df.set_index("nx")["Cycles / Loop Iteration"].plot(ax=ax1, legend=True);
-	df.set_index("nx")["Instructions / Loop Iteration"].plot(ax=ax2, legend=True);
+	for ax, pmu_counter in zip([ax1, ax2], ["PM_RUN_CYC (min)", "PM_INST_CMPL (min)"]):
+	    df.set_index("Grid Points")[pmu_counter].plot(ax=ax, legend=True);
+	    ax.plot(
+	        df["Grid Points"], 
+	        linear_function(df["Grid Points"], *fit_parameters[pmu_counter]), 
+	        linestyle="--", 
+	        label="Fit: {:.2f} * x + {:.2f}".format(*fit_parameters[pmu_counter])
+	    )
+	    ax.legend();
 	fig.savefig("plot-task1.pdf")
 
 def task2a(input="poisson2d.ld_st.bin.csv"):
-	df_ldst = pd.read_csv(input, skiprows=range(2, 50000, 2))
-	common.normalize(df_ldst, "PM_LD_CMPL (min)", "Loads / Loop Iteration")
-	common.normalize(df_ldst, "PM_ST_CMPL (min)", "Stores / Loop Iteration")
+	df_ldst = pd.read_csv("poisson2d.ld_st.bin.csv", skiprows=range(2, 50000, 2))
+	df_ldst["Grid Points"] = df_ldst["nx"] * df_ldst["ny"] 
+	fit_parameters, fit_covariance = common.print_and_return_fit(
+	    ["PM_LD_CMPL (min)", "PM_ST_CMPL (min)"], 
+	    df_ldst.set_index("Grid Points"), 
+	    linear_function,
+	    format_value=".4f"
+	)
 	fig, (ax1, ax2) = plt.subplots(nrows=2, sharex=True)
-	df_ldst.set_index("nx")["Loads / Loop Iteration"].plot(ax=ax1, legend=True);
-	df_ldst.set_index("nx")["Stores / Loop Iteration"].plot(ax=ax2, legend=True);
+	for ax, pmu_counter in zip([ax1, ax2], ["PM_LD_CMPL (min)", "PM_ST_CMPL (min)"]):
+	    df_ldst.set_index("Grid Points")[pmu_counter].plot(ax=ax, legend=True);
+	    ax.plot(
+	        df_ldst["Grid Points"], 
+	        linear_function(df_ldst["Grid Points"], *fit_parameters[pmu_counter]), 
+	        linestyle="--", 
+	        label="Fit: {:.2f} * x + {:.2f}".format(*fit_parameters[pmu_counter])
+	    )
+	    ax.legend();
 	fig.savefig("plot-task2a.pdf")
 
 
-def task2b(input1="poisson2d.vld.bin.csv", input2="poisson2d.vst.bin.csv", input3="poisson2d.ld_st.bin.csv", bytes=False):
+def task2b(input1="poisson2d.vld.bin.csv", input2="poisson2d.vst.bin.csv", input3="poisson2d.ld_st.bin.csv", bytes=False, just_return=False):
 	df_vld = pd.read_csv(input1, skiprows=range(2, 50000, 2))
 	df_vst = pd.read_csv(input2, skiprows=range(2, 50000, 2))
 	df_vldvst = pd.concat([df_vld.set_index("nx"), df_vst.set_index("nx")[['PM_VECTOR_ST_CMPL (total)', 'PM_VECTOR_ST_CMPL (min)', ' PM_VECTOR_ST_CMPL (max)']]], axis=1).reset_index()
-	common.normalize(df_vldvst, "PM_VECTOR_LD_CMPL (min)", "Vector Loads / Loop Iteration")
-	common.normalize(df_vldvst, "PM_VECTOR_ST_CMPL (min)", "Vector Stores / Loop Iteration")
+	df_vldvst["Grid Points"] = df_vldvst["nx"] * df_vldvst["ny"] 
+	fit_parameters, fit_covariance = common.print_and_return_fit(
+	    ["PM_VECTOR_LD_CMPL (min)", "PM_VECTOR_ST_CMPL (min)"], 
+	    df_vldvst.set_index("Grid Points"), 
+	    linear_function,
+	    format_value=".4f",
+	)
 	if bytes is False:
 		fig, (ax1, ax2) = plt.subplots(nrows=2, sharex=True)
-		df_vldvst.set_index("nx")["Vector Loads / Loop Iteration"].plot(ax=ax1, legend=True);
-		df_vldvst.set_index("nx")["Vector Stores / Loop Iteration"].plot(ax=ax2, legend=True);
+		for ax, pmu_counter in zip([ax1, ax2], ["PM_VECTOR_LD_CMPL (min)", "PM_VECTOR_ST_CMPL (min)"]):
+		    df_vldvst.set_index("Grid Points")[pmu_counter].plot(ax=ax, legend=True);
+		    ax.plot(
+		        df_vldvst["Grid Points"], 
+		        linear_function(df_vldvst["Grid Points"], *fit_parameters[pmu_counter]), 
+		        linestyle="--", 
+		        label="Fit: {:.2f} * x + {:.2f}".format(*fit_parameters[pmu_counter])
+		    )
+		    ax.legend();
 		fig.savefig("plot-task2b.pdf")
 	else:
-		df_ldst = pd.read_csv(input3, skiprows=range(2, 50000, 2))
-		common.normalize(df_ldst, "PM_LD_CMPL (min)", "Loads / Loop Iteration")
-		common.normalize(df_ldst, "PM_ST_CMPL (min)", "Stores / Loop Iteration")
 		df_byte = pd.DataFrame()
-		df_byte["Loads / Loop Iteration"] = (df_vldvst.set_index("nx")["Vector Loads / Loop Iteration"] + df_ldst.set_index("nx")["Loads / Loop Iteration"])*8
-		df_byte["Stores / Loop Iteration"] = (df_vldvst.set_index("nx")["Vector Stores / Loop Iteration"] + df_ldst.set_index("nx")["Stores / Loop Iteration"])*8
-		fig, ax = plt.subplots()
-		ax = df_byte.plot(ax=ax)
-		ax.set_ylabel("Bytes / Loop Iteration");
-		fig.savefig("plot-task2b-2.pdf")
+		df_ldst = pd.read_csv(input3, skiprows=range(2, 50000, 2))
+		df_ldst["Grid Points"] = df_ldst["nx"] * df_ldst["ny"] 
+		df_byte["Loads"]  = (df_vldvst.set_index("Grid Points")["PM_VECTOR_LD_CMPL (min)"] + df_ldst.set_index("Grid Points")["PM_LD_CMPL (min)"])*8
+		df_byte["Stores"] = (df_vldvst.set_index("Grid Points")["PM_VECTOR_ST_CMPL (min)"] + df_ldst.set_index("Grid Points")["PM_ST_CMPL (min)"])*8
+		if not just_return:
+			_fit, _cov = common.print_and_return_fit(
+			    ["Loads", "Stores"], 
+			    df_byte, 
+			    linear_function
+			)
+			fit_parameters = {**fit_parameters, **_fit}
+			fit_covariance = {**fit_covariance, **_cov}
+			fig, ax = plt.subplots()
+			for pmu_counter in ["Loads", "Stores"]:
+			    df_byte[pmu_counter].plot(ax=ax, legend=True);
+			    ax.plot(
+			        df_byte.index, 
+			        linear_function(df_byte.index, *fit_parameters[pmu_counter]), 
+			        linestyle="--", 
+			        label="Fit: {:.2f} * x + {:.2f}".format(*fit_parameters[pmu_counter])
+			    )
+			ax.legend();
+			ax.set_ylabel("Bytes");
+			fig.savefig("plot-task2b-2.pdf")
+		else:
+			return df_byte
 
 def task2c(input1="poisson2d.vld.bin.csv", input2="poisson2d.vst.bin.csv", input3="poisson2d.ld_st.bin.csv", input4="poisson2d.ins_cyc.bin.csv"):
 	df = pd.read_csv(input4, skiprows=range(2, 50000, 2))
@@ -77,29 +135,25 @@ def task4(input1="poisson2d.vld.bin.csv", input2="poisson2d.vst.bin.csv", input3
 	df_sflop = pd.read_csv(input5, skiprows=range(2, 50000, 2))
 	df_vflop = pd.read_csv(input6, skiprows=range(2, 50000, 2))
 	df_flop = pd.concat([df_sflop.set_index("nx"), df_vflop.set_index("nx")[['PM_VECTOR_FLOP_CMPL (total)', 'PM_VECTOR_FLOP_CMPL (min)', ' PM_VECTOR_FLOP_CMPL (max)']]], axis=1).reset_index()
-	common.normalize(df_flop, "PM_SCALAR_FLOP_CMPL (min)", "Scalar FlOps / Loop Iteration")
-	common.normalize(df_flop, "PM_VECTOR_FLOP_CMPL (min)", "Vector Instructions / Loop Iteration")
-	df_flop["Vector FlOps / Loop Iteration"] = df_flop["Vector Instructions / Loop Iteration"] * 2
+
+	df_flop["Grid Points"] = df_flop["nx"] * df_flop["ny"]
+	df_flop["Vector FlOps (min)"] = df_flop["PM_VECTOR_FLOP_CMPL (min)"] * 2
+	df_flop["Scalar FlOps (min)"] = df_flop["PM_SCALAR_FLOP_CMPL (min)"]
+	fit_parameters, fit_covariance = common.print_and_return_fit(
+	    ["Scalar FlOps (min)", "Vector FlOps (min)"], 
+	    df_flop.set_index("Grid Points"), 
+	    linear_function
+	)
 	if ai is False:
 		fig, ax = plt.subplots()
-		df_flop.set_index("nx")[["Scalar FlOps / Loop Iteration", "Vector FlOps / Loop Iteration"]].plot(ax=ax);
+		df_flop.set_index("Grid Points")[["Scalar FlOps (min)", "Vector FlOps (min)"]].plot(ax=ax);
 		fig.savefig("plot-task4.pdf")
 	else:
-		df_vld = pd.read_csv(input1, skiprows=range(2, 50000, 2))
-		df_vst = pd.read_csv(input2, skiprows=range(2, 50000, 2))
-		df_vldvst = pd.concat([df_vld.set_index("nx"), df_vst.set_index("nx")[['PM_VECTOR_ST_CMPL (total)', 'PM_VECTOR_ST_CMPL (min)', ' PM_VECTOR_ST_CMPL (max)']]], axis=1).reset_index()
-		common.normalize(df_vldvst, "PM_VECTOR_LD_CMPL (min)", "Vector Loads / Loop Iteration")
-		common.normalize(df_vldvst, "PM_VECTOR_ST_CMPL (min)", "Vector Stores / Loop Iteration")
-		df_ldst = pd.read_csv(input3, skiprows=range(2, 50000, 2))
-		common.normalize(df_ldst, "PM_LD_CMPL (min)", "Loads / Loop Iteration")
-		common.normalize(df_ldst, "PM_ST_CMPL (min)", "Stores / Loop Iteration")
-		df_byte = pd.DataFrame()
-		df_byte["Loads / Loop Iteration"] = (df_vldvst.set_index("nx")["Vector Loads / Loop Iteration"] + df_ldst.set_index("nx")["Loads / Loop Iteration"])*8
-		df_byte["Stores / Loop Iteration"] = (df_vldvst.set_index("nx")["Vector Stores / Loop Iteration"] + df_ldst.set_index("nx")["Stores / Loop Iteration"])*8
-		I_flop_scalar = df_flop.set_index("nx")["Scalar FlOps / Loop Iteration"]
-		I_flop_vector = df_flop.set_index("nx")["Vector FlOps / Loop Iteration"]
-		I_mem_load    = df_byte["Loads / Loop Iteration"]
-		I_mem_store   = df_byte["Stores / Loop Iteration"]
+		df_byte = task2b(bytes=True, just_return=True)
+		I_flop_scalar = df_flop.set_index("Grid Points")["Scalar FlOps (min)"]
+		I_flop_vector = df_flop.set_index("Grid Points")["Vector FlOps (min)"]
+		I_mem_load    = df_byte["Loads"]
+		I_mem_store   = df_byte["Stores"]
 		df_ai = pd.DataFrame()
 		df_ai["Arithmetic Intensity"] = (I_flop_scalar + I_flop_vector) / (I_mem_load + I_mem_store)
 		fig, ax = plt.subplots()
-- 
GitLab