From 4b92cac4f9d1897d440642185b458e3466ecfaab Mon Sep 17 00:00:00 2001 From: Dirk Pleiter <d.pleiter@fz-juelich.de> Date: Fri, 31 Jul 2020 22:34:33 +0200 Subject: [PATCH] Fixed old bug in time measurement; more verbose output when using BLIS --- src/mygemm/Makefile | 2 +- src/mygemm/mydgemm.c | 208 ++---------------------------------------- src/mygemm/mysgemm.c | 209 ++----------------------------------------- src/mygemm/myzgemm.c | 209 ++----------------------------------------- 4 files changed, 20 insertions(+), 608 deletions(-) diff --git a/src/mygemm/Makefile b/src/mygemm/Makefile index 946cbbe..85811df 100644 --- a/src/mygemm/Makefile +++ b/src/mygemm/Makefile @@ -41,7 +41,7 @@ all: $(TARGETS) %.s: $(SRCDIR)/%.c $(CC) $(CFLAGS) -S $< -o $@ -%.x: $(SRCDIR)/%.c +%.x: $(SRCDIR)/%.c $(SRCDIR)/myxgemm.h $(CC) $(CFLAGS) $< -o $@ $(LIB) clean: diff --git a/src/mygemm/mydgemm.c b/src/mygemm/mydgemm.c index 7e3081d..6a66547 100644 --- a/src/mygemm/mydgemm.c +++ b/src/mygemm/mydgemm.c @@ -2,208 +2,12 @@ // DGEMM benchmark //================================================================================================== -#include <stdio.h> -#include <stdlib.h> -#ifdef _OPENMP -#include <omp.h> -#endif +typedef double my_t; -#ifdef USEGETTIME -#include <time.h> -#endif +#define MYBLIS_T BLIS_DOUBLE +#define MYXGEMM cblas_dgemm -#ifdef USEPAPI -#include "papi.h" -#endif +#define xstr(a) str(a) +#define str(a) #a -#ifdef USEOPENBLAS -#include "cblas.h" -#endif - -#ifdef USEBLIS -#define BLIS_ENABLE_CBLAS 1 -#include "blis/blis.h" -#endif - -#ifdef USEPAPI -#define NEVENT 4 -int event[NEVENT] = { PAPI_REF_CYC, PAPI_L1_DCM, PAPI_L2_DCM, PAPI_TOT_INS }; -//int event[NEVENT] = { PAPI_TOT_CYC, PAPI_FP_INS, PAPI_L1_DCM }; -#else -#define NEVENT 0 -#endif - -#define NREPMAX 10000 - -double *a; -double *b; -double *c; -double alpha; -double beta; - -int main(int argc, char* argv[]) -{ - int i, j, k, l; - int n, nrep; - long long cnt[NEVENT]; - long long stat[NEVENT+1][NREPMAX]; - long long sum[NEVENT+1]; - long long min[NEVENT+1]; - long long max[NEVENT+1]; -#ifdef USEPAPI - char event_s[PAPI_MAX_STR_LEN]; -#endif -#ifdef USEGETTIME - struct timespec t0, t1; -#endif - - if (argc != 3) - { - fprintf(stderr, "Usage: %s <n> <nrep>\n", argv[0]); - return 1; - } - - n = atoi(argv[1]); - nrep = atoi(argv[2]); - - if (nrep > NREPMAX) - { - fprintf(stderr, "nrep exceeds NREPMAX (%d>%d)\n", nrep, NREPMAX); - return 1; - } - - a = (double *) malloc(n * n * sizeof(double)); - b = (double *) malloc(n * n * sizeof(double)); - c = (double *) malloc(n * n * sizeof(double)); - - if (a == NULL || b == NULL || c == NULL) - { - fprintf(stderr, "malloc() failed\n"); - return 1; - } - -#ifdef USEPAPI - if (PAPI_start_counters(event, NEVENT) != PAPI_OK) - { - fprintf(stderr, "PAPI_start_counters() failed\n"); - //return 1; - } -#endif - -#ifdef USEBLIS - bli_init(); -#endif - - //------------------------------------------------------------------------------------------------ - // Main loop - //------------------------------------------------------------------------------------------------ - - for (k = 0; k < nrep; k++) - { - for (i = 0; i < n; i++) - for (j = 0; j < n; j++) - { - a[i*n+j] = 1.0 + 0.01 * i + 0.0001 * j; - b[i*n+j] = 2.0 + 0.01 * i + 0.0001 * j; - c[i*n+j] = 3.0 + 0.01 * i + 0.0001 * j; - } - alpha = 0.5; - beta = 0.6; - -#ifdef USEGETTIME - clock_gettime(CLOCK_PROCESS_CPUTIME_ID , &t0); -#endif -#ifdef USEPAPI - PAPI_read_counters(cnt, NEVENT); -#endif - - cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, n, n, alpha, a, n, b, n, beta ,c, n); - -#ifdef USEPAPI - PAPI_read_counters(cnt, NEVENT); -#endif -#ifdef USEGETTIME - clock_gettime(CLOCK_PROCESS_CPUTIME_ID , &t1); -#endif - -#ifdef USEPAPI - for (l = 0; l < NEVENT; l++) - stat[l][k] = cnt[l]; -#endif -#ifdef USEGETTIME - stat[NEVENT][k] = (long long) ((t1.tv_sec - t0.tv_sec) * 1e9 + (t1.tv_nsec - t0.tv_nsec)); -#endif - } - - //------------------------------------------------------------------------------------------------ - // Test output - //------------------------------------------------------------------------------------------------ -#if 0 - for (k = 0; k < n; k++) - { - for (l = 0; l < n; l++) - printf(" (%.5f,%.5f)", creal(a[k*n+l]), cimag(a[k*n+l])); - printf("\t"); - for (l = 0; l < n; l++) - printf(" (%.5f,%.5f)", creal(b[k*n+l]), cimag(b[k*n+l])); - printf("\t"); - for (l = 0; l < n; l++) - printf(" (%.5f,%.5f)", creal(c[k*n+l]), cimag(c[k*n+l])); - printf("\n"); - } -#endif - - //------------------------------------------------------------------------------------------------ - // Generate statistics - //------------------------------------------------------------------------------------------------ - - for (l = 0; l <= NEVENT; l++) - { - sum[l] = 0; - min[l] = -1; - max[l] = 0; - for (k = 1; k < nrep; k++) - { - sum[l] += stat[l][k]; - min[l] = (min[l] < 0 || stat[l][k] < min[l]) ? stat[l][k] : min[l]; - max[l] = (stat[l][k] > max[l]) ? stat[l][k] : max[l]; - } - } - - //------------------------------------------------------------------------------------------------ - // Print results - //------------------------------------------------------------------------------------------------ - -#if defined(USEOPENBLAS) - printf("# OpenBLAS\n"); - printf("# nthr=%s\n", getenv("OPENBLAS_NUM_THREADS")); -#elif defined (USEBLIS) - printf("# BLIS\n"); - printf("# nthr=%s\n", getenv("BLIS_NUM_THREADS")); -#endif - printf("# n=%d\n", n); - printf("# nrep=%d\n", nrep); - printf("#"); -#ifdef USEPAPI - for (l = 0; l < NEVENT; l++) - { - PAPI_event_code_to_name(event[l], event_s); - printf(" %s", event_s); - } -#endif -#ifdef USEGETTIME - printf(" time/ns\n"); -#endif - -#if defined(USEPAPI) || defined(USEGETTIME) - printf("%-15s: ", "cblas_dgemm"); - for (l = 0; l <= NEVENT; l++) - { - printf("\t%.3e %.3e %.3e", (double) sum[l] / (nrep - 1.), - (double) min[l], (double) max[l]); - } - printf("\n"); -#endif - - return 0; -} +#include "myxgemm.h" diff --git a/src/mygemm/mysgemm.c b/src/mygemm/mysgemm.c index d7cc22e..18ff7d2 100644 --- a/src/mygemm/mysgemm.c +++ b/src/mygemm/mysgemm.c @@ -2,209 +2,12 @@ // SGEMM benchmark //================================================================================================== -#include <stdio.h> -#include <stdlib.h> -#ifdef _OPENMP -#include <omp.h> -#endif +typedef float my_t; -#ifdef USEGETTIME -#include <time.h> -#endif +#define MYBLIS_T BLIS_FLOAT +#define MYXGEMM cblas_sgemm -#ifdef USEPAPI -#include "papi.h" -#endif - -#ifdef USEOPENBLAS -#include "cblas.h" -#endif - -#ifdef USEBLIS -#define BLIS_ENABLE_CBLAS 1 -#include "blis/blis.h" -#endif - -#ifdef USEPAPI -#define NEVENT 4 -int event[NEVENT] = { PAPI_REF_CYC, PAPI_L1_DCM, PAPI_L2_DCM, PAPI_TOT_INS }; -//int event[NEVENT] = { PAPI_TOT_CYC, PAPI_FP_INS, PAPI_L1_DCM }; -#else -#define NEVENT 0 -#endif - -#define NREPMAX 10000 - -float *a; -float *b; -float *c; -float alpha; -float beta; - -int main(int argc, char* argv[]) -{ - int i, j, k, l; - int n, nrep; - long long cnt[NEVENT]; - long long stat[NEVENT+1][NREPMAX]; - long long sum[NEVENT+1]; - long long min[NEVENT+1]; - long long max[NEVENT+1]; -#ifdef USEPAPI - char event_s[PAPI_MAX_STR_LEN]; -#endif -#ifdef USEGETTIME - struct timespec t0, t1; -#endif - - if (argc != 3) - { - fprintf(stderr, "Usage: %s <n> <nrep>\n", argv[0]); - return 1; - } - - n = atoi(argv[1]); - nrep = atoi(argv[2]); - - if (nrep > NREPMAX) - { - fprintf(stderr, "nrep exceeds NREPMAX (%d>%d)\n", nrep, NREPMAX); - return 1; - } - - a = (float *) malloc(n * n * sizeof(float)); - b = (float *) malloc(n * n * sizeof(float)); - c = (float *) malloc(n * n * sizeof(float)); - - if (a == NULL || b == NULL || c == NULL) - { - fprintf(stderr, "malloc() failed\n"); - return 1; - } - -#ifdef USEPAPI - if (PAPI_start_counters(event, NEVENT) != PAPI_OK) - { - fprintf(stderr, "PAPI_start_counters() failed\n"); - //return 1; - } -#endif - -#ifdef USEBLIS - bli_init(); -#endif - - //------------------------------------------------------------------------------------------------ - // Main loop - //------------------------------------------------------------------------------------------------ - - for (k = 0; k < nrep; k++) - { - for (i = 0; i < n; i++) - for (j = 0; j < n; j++) - { - a[i*n+j] = 1.0 + 0.01 * i + 0.0001 * j; - b[i*n+j] = 2.0 + 0.01 * i + 0.0001 * j; - c[i*n+j] = 3.0 + 0.01 * i + 0.0001 * j; - } - alpha = 0.5; - beta = 0.6; - -#ifdef USEGETTIME - clock_gettime(CLOCK_PROCESS_CPUTIME_ID , &t0); -#endif -#ifdef USEPAPI - PAPI_read_counters(cnt, NEVENT); -#endif - - cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, n, n, alpha, a, n, b, n, beta ,c, n); - -#ifdef USEPAPI - PAPI_read_counters(cnt, NEVENT); -#endif -#ifdef USEGETTIME - clock_gettime(CLOCK_PROCESS_CPUTIME_ID , &t1); -#endif - -#ifdef USEPAPI - for (l = 0; l < NEVENT; l++) - stat[l][k] = cnt[l]; -#endif -#ifdef USEGETTIME - stat[NEVENT][k] = (long long) ((t1.tv_sec - t0.tv_sec) * 1e9 + (t1.tv_nsec - t0.tv_nsec)); -#endif - } - - //------------------------------------------------------------------------------------------------ - // Test output - //------------------------------------------------------------------------------------------------ -#if 0 - for (k = 0; k < n; k++) - { - for (l = 0; l < n; l++) - printf(" (%.5f,%.5f)", creal(a[k*n+l]), cimag(a[k*n+l])); - printf("\t"); - for (l = 0; l < n; l++) - printf(" (%.5f,%.5f)", creal(b[k*n+l]), cimag(b[k*n+l])); - printf("\t"); - for (l = 0; l < n; l++) - printf(" (%.5f,%.5f)", creal(c[k*n+l]), cimag(c[k*n+l])); - printf("\n"); - } -#endif - - //------------------------------------------------------------------------------------------------ - // Generate statistics - //------------------------------------------------------------------------------------------------ - - for (l = 0; l <= NEVENT; l++) - { - sum[l] = 0; - min[l] = -1; - max[l] = 0; - for (k = 1; k < nrep; k++) - { - sum[l] += stat[l][k]; - min[l] = (min[l] < 0 || stat[l][k] < min[l]) ? stat[l][k] : min[l]; - max[l] = (stat[l][k] > max[l]) ? stat[l][k] : max[l]; - } - } - - //------------------------------------------------------------------------------------------------ - // Print results - //------------------------------------------------------------------------------------------------ - -#if defined(USEOPENBLAS) - printf("# OpenBLAS\n"); - printf("# nthr=%s\n", getenv("OPENBLAS_NUM_THREADS")); -#elif defined (USEBLIS) - printf("# BLIS\n"); - printf("# nthr=%s\n", getenv("BLIS_NUM_THREADS")); -#endif - printf("# n=%d\n", n); - printf("# nrep=%d\n", nrep); - printf("#"); -#ifdef USEPAPI - for (l = 0; l < NEVENT; l++) - { - PAPI_event_code_to_name(event[l], event_s); - printf(" %s", event_s); - } -#endif -#ifdef USEGETTIME - printf(" time/ns\n"); -#endif - -#if defined(USEPAPI) || defined(USEGETTIME) - printf("%-15s: ", "cblas_sgemm"); - for (l = 0; l <= NEVENT; l++) - { - printf("\t%.3e %.3e %.3e", (float) sum[l] / (nrep - 1.), - (float) min[l], (float) max[l]); - } - printf("\n"); -#endif - - return 0; -} +#define xstr(a) str(a) +#define str(a) #a +#include "myxgemm.h" diff --git a/src/mygemm/myzgemm.c b/src/mygemm/myzgemm.c index 145b047..aea4ec6 100644 --- a/src/mygemm/myzgemm.c +++ b/src/mygemm/myzgemm.c @@ -2,211 +2,16 @@ // ZGEMM benchmark //================================================================================================== -#include <stdio.h> -#include <stdlib.h> #include <complex.h> -#ifdef _OPENMP -#include <omp.h> -#endif -#ifdef USEGETTIME -#include <time.h> -#endif +typedef double complex my_t; -#ifdef USEPAPI -#include "papi.h" -#endif +#define CMPLX -#ifdef USEOPENBLAS -#include "cblas.h" -#endif +#define MYBLIS_T BLIS_DCOMPLEX +#define MYXGEMM cblas_zgemm -#ifdef USEBLIS -#define BLIS_ENABLE_CBLAS 1 -#include "blis/blis.h" -#endif +#define xstr(a) str(a) +#define str(a) #a -#ifdef USEPAPI -#define NEVENT 4 -int event[NEVENT] = { PAPI_REF_CYC, PAPI_L1_DCM, PAPI_L2_DCM, PAPI_TOT_INS }; -//int event[NEVENT] = { PAPI_TOT_CYC, PAPI_FP_INS, PAPI_L1_DCM }; -#else -#define NEVENT 0 -#endif - -#define NREPMAX 10000 - -typedef double complex complex_t; - -complex_t *a; -complex_t *b; -complex_t *c; -complex_t alpha; -complex_t beta; - -int main(int argc, char* argv[]) -{ - int i, j, k, l; - int n, nrep; - long long cnt[NEVENT]; - long long stat[NEVENT+1][NREPMAX]; - long long sum[NEVENT+1]; - long long min[NEVENT+1]; - long long max[NEVENT+1]; -#ifdef USEPAPI - char event_s[PAPI_MAX_STR_LEN]; -#endif -#ifdef USEGETTIME - struct timespec t0, t1; -#endif - - if (argc != 3) - { - fprintf(stderr, "Usage: %s <n> <nrep>\n", argv[0]); - return 1; - } - - n = atoi(argv[1]); - nrep = atoi(argv[2]); - - if (nrep > NREPMAX) - { - fprintf(stderr, "nrep exceeds NREPMAX (%d>%d)\n", nrep, NREPMAX); - return 1; - } - - a = (complex_t *) malloc(n * n * sizeof(complex_t)); - b = (complex_t *) malloc(n * n * sizeof(complex_t)); - c = (complex_t *) malloc(n * n * sizeof(complex_t)); - - if (a == NULL || b == NULL || c == NULL) - { - fprintf(stderr, "malloc() failed\n"); - return 1; - } - -#ifdef USEPAPI - if (PAPI_start_counters(event, NEVENT) != PAPI_OK) - { - fprintf(stderr, "PAPI_start_counters() failed\n"); - //return 1; - } -#endif - -#ifdef USEBLIS - bli_init(); -#endif - - //------------------------------------------------------------------------------------------------ - // Main loop - //------------------------------------------------------------------------------------------------ - - for (k = 0; k < nrep; k++) - { - for (i = 0; i < n; i++) - for (j = 0; j < n; j++) - { - a[i*n+j] = 1.0 + 0.01 * i + 0.0001 * j; - b[i*n+j] = 2.0 + 0.01 * i + 0.0001 * j; - c[i*n+j] = 3.0 + 0.01 * i + 0.0001 * j; - } - alpha = 0.5; - beta = 0.6; - -#ifdef USEGETTIME - clock_gettime(CLOCK_PROCESS_CPUTIME_ID , &t0); -#endif -#ifdef USEPAPI - PAPI_read_counters(cnt, NEVENT); -#endif - - cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, n, n, &alpha, a, n, b, n, &beta ,c, n); - -#ifdef USEPAPI - PAPI_read_counters(cnt, NEVENT); -#endif -#ifdef USEGETTIME - clock_gettime(CLOCK_PROCESS_CPUTIME_ID , &t1); -#endif - -#ifdef USEPAPI - for (l = 0; l < NEVENT; l++) - stat[l][k] = cnt[l]; -#endif -#ifdef USEGETTIME - stat[NEVENT][k] = (long long) ((t1.tv_sec - t0.tv_sec) * 1e9 + (t1.tv_nsec - t0.tv_nsec)); -#endif - } - - //------------------------------------------------------------------------------------------------ - // Test output - //------------------------------------------------------------------------------------------------ -#if 0 - for (k = 0; k < n; k++) - { - for (l = 0; l < n; l++) - printf(" (%.5f,%.5f)", creal(a[k*n+l]), cimag(a[k*n+l])); - printf("\t"); - for (l = 0; l < n; l++) - printf(" (%.5f,%.5f)", creal(b[k*n+l]), cimag(b[k*n+l])); - printf("\t"); - for (l = 0; l < n; l++) - printf(" (%.5f,%.5f)", creal(c[k*n+l]), cimag(c[k*n+l])); - printf("\n"); - } -#endif - - //------------------------------------------------------------------------------------------------ - // Generate statistics - //------------------------------------------------------------------------------------------------ - - for (l = 0; l <= NEVENT; l++) - { - sum[l] = 0; - min[l] = -1; - max[l] = 0; - for (k = 1; k < nrep; k++) - { - sum[l] += stat[l][k]; - min[l] = (min[l] < 0 || stat[l][k] < min[l]) ? stat[l][k] : min[l]; - max[l] = (stat[l][k] > max[l]) ? stat[l][k] : max[l]; - } - } - - //------------------------------------------------------------------------------------------------ - // Print results - //------------------------------------------------------------------------------------------------ - -#if defined(USEOPENBLAS) - printf("# OpenBLAS\n"); - printf("# nthr=%s\n", getenv("OPENBLAS_NUM_THREADS")); -#elif defined (USEBLIS) - printf("# BLIS\n"); - printf("# nthr=%s\n", getenv("BLIS_NUM_THREADS")); -#endif - printf("# n=%d\n", n); - printf("# nrep=%d\n", nrep); - printf("#"); -#ifdef USEPAPI - for (l = 0; l < NEVENT; l++) - { - PAPI_event_code_to_name(event[l], event_s); - printf(" %s", event_s); - } -#endif -#ifdef USEGETTIME - printf(" time/ns\n"); -#endif - -#if defined(USEPAPI) || defined(USEGETTIME) - printf("%-15s: ", "cblas_zgemm"); - for (l = 0; l <= NEVENT; l++) - { - printf("\t%.3e %.3e %.3e", (double) sum[l] / (nrep - 1.), - (double) min[l], (double) max[l]); - } - printf("\n"); -#endif - - return 0; -} +#include "myxgemm.h" -- GitLab