diff --git a/src/mygemm/Makefile b/src/mygemm/Makefile
index 946cbbe978f287b58f303185e2543d957c2218f2..85811df6f31fdc255dd3086cc4324c48682c3f55 100644
--- a/src/mygemm/Makefile
+++ b/src/mygemm/Makefile
@@ -41,7 +41,7 @@ all: $(TARGETS)
 %.s: $(SRCDIR)/%.c
 	$(CC) $(CFLAGS) -S $< -o $@
 
-%.x: $(SRCDIR)/%.c
+%.x: $(SRCDIR)/%.c $(SRCDIR)/myxgemm.h
 	$(CC) $(CFLAGS) $< -o $@ $(LIB)
 
 clean:
diff --git a/src/mygemm/mydgemm.c b/src/mygemm/mydgemm.c
index 7e3081d6e5b5fa440461cb34e030da4860ad51b8..6a66547bb5ac67ead3f9c79cc93d8ae9e0109199 100644
--- a/src/mygemm/mydgemm.c
+++ b/src/mygemm/mydgemm.c
@@ -2,208 +2,12 @@
 // DGEMM benchmark
 //==================================================================================================
 
-#include <stdio.h>
-#include <stdlib.h>
-#ifdef _OPENMP
-#include <omp.h>
-#endif
+typedef double my_t;
 
-#ifdef USEGETTIME
-#include <time.h>
-#endif
+#define MYBLIS_T BLIS_DOUBLE
+#define MYXGEMM cblas_dgemm
 
-#ifdef USEPAPI
-#include "papi.h"
-#endif
+#define xstr(a) str(a)
+#define str(a) #a
 
-#ifdef USEOPENBLAS
-#include "cblas.h"
-#endif
-
-#ifdef USEBLIS
-#define BLIS_ENABLE_CBLAS 1
-#include "blis/blis.h"
-#endif
-
-#ifdef USEPAPI
-#define NEVENT 4
-int event[NEVENT] = { PAPI_REF_CYC, PAPI_L1_DCM, PAPI_L2_DCM, PAPI_TOT_INS };
-//int event[NEVENT] = { PAPI_TOT_CYC, PAPI_FP_INS, PAPI_L1_DCM };
-#else
-#define NEVENT 0
-#endif
-
-#define NREPMAX 10000
-
-double *a;
-double *b;
-double *c;
-double alpha;
-double beta;
-
-int main(int argc, char* argv[])
-{
-  int i, j, k, l;
-  int n, nrep;
-  long long cnt[NEVENT];
-  long long stat[NEVENT+1][NREPMAX];
-  long long sum[NEVENT+1];
-  long long min[NEVENT+1];
-  long long max[NEVENT+1];
-#ifdef USEPAPI
-  char event_s[PAPI_MAX_STR_LEN];
-#endif
-#ifdef USEGETTIME
-  struct timespec t0, t1;
-#endif
-
-  if (argc != 3)
-  {
-    fprintf(stderr, "Usage: %s <n> <nrep>\n", argv[0]);
-    return 1;
-  }
-
-  n = atoi(argv[1]);
-  nrep = atoi(argv[2]);
-
-  if (nrep > NREPMAX)
-  {
-    fprintf(stderr, "nrep exceeds NREPMAX (%d>%d)\n", nrep, NREPMAX);
-    return 1;
-  }
-
-  a = (double *) malloc(n * n * sizeof(double));
-  b = (double *) malloc(n * n * sizeof(double));
-  c = (double *) malloc(n * n * sizeof(double));
-
-  if (a == NULL || b == NULL || c == NULL)
-  {
-    fprintf(stderr, "malloc() failed\n");
-    return 1;
-  }
-
-#ifdef USEPAPI
-  if (PAPI_start_counters(event, NEVENT) != PAPI_OK)
-  {
-    fprintf(stderr, "PAPI_start_counters() failed\n");
-    //return 1;
-  }
-#endif
-
-#ifdef USEBLIS
-  bli_init();
-#endif
-
-  //------------------------------------------------------------------------------------------------
-  // Main loop
-  //------------------------------------------------------------------------------------------------
-
-  for (k = 0; k < nrep; k++)
-  {
-    for (i = 0; i < n; i++)
-      for (j = 0; j < n; j++)
-      {
-        a[i*n+j] = 1.0 + 0.01 * i + 0.0001 * j;
-        b[i*n+j] = 2.0 + 0.01 * i + 0.0001 * j;
-        c[i*n+j] = 3.0 + 0.01 * i + 0.0001 * j;
-      }
-    alpha = 0.5;
-    beta  = 0.6;
-
-#ifdef USEGETTIME
-    clock_gettime(CLOCK_PROCESS_CPUTIME_ID , &t0);
-#endif
-#ifdef USEPAPI
-    PAPI_read_counters(cnt, NEVENT);
-#endif
-
-    cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, n, n, alpha, a, n, b, n, beta ,c, n);
-
-#ifdef USEPAPI
-    PAPI_read_counters(cnt, NEVENT);
-#endif
-#ifdef USEGETTIME
-    clock_gettime(CLOCK_PROCESS_CPUTIME_ID , &t1);
-#endif
-
-#ifdef USEPAPI
-    for (l = 0; l < NEVENT; l++)
-      stat[l][k] = cnt[l];
-#endif
-#ifdef USEGETTIME
-    stat[NEVENT][k] = (long long) ((t1.tv_sec - t0.tv_sec) * 1e9 + (t1.tv_nsec - t0.tv_nsec));
-#endif
-  }
-
-  //------------------------------------------------------------------------------------------------
-  // Test output
-  //------------------------------------------------------------------------------------------------
-#if 0
-  for (k = 0; k < n; k++)
-  {
-    for (l = 0; l < n; l++)
-      printf(" (%.5f,%.5f)", creal(a[k*n+l]), cimag(a[k*n+l]));
-    printf("\t");
-    for (l = 0; l < n; l++)
-      printf(" (%.5f,%.5f)", creal(b[k*n+l]), cimag(b[k*n+l]));
-    printf("\t");
-    for (l = 0; l < n; l++)
-      printf(" (%.5f,%.5f)", creal(c[k*n+l]), cimag(c[k*n+l]));
-    printf("\n");
-  }
-#endif
-
-  //------------------------------------------------------------------------------------------------
-  // Generate statistics
-  //------------------------------------------------------------------------------------------------
-
-  for (l = 0; l <= NEVENT; l++)
-  {
-    sum[l] = 0;
-    min[l] = -1;
-    max[l] = 0;
-    for (k = 1; k < nrep; k++)
-    {
-      sum[l] += stat[l][k];
-      min[l]  = (min[l] < 0 || stat[l][k] < min[l]) ? stat[l][k] : min[l];
-      max[l]  = (stat[l][k] > max[l]) ? stat[l][k] : max[l];
-    }
-  }
-
-  //------------------------------------------------------------------------------------------------
-  // Print results
-  //------------------------------------------------------------------------------------------------
-
-#if defined(USEOPENBLAS)
-  printf("# OpenBLAS\n");
-  printf("# nthr=%s\n", getenv("OPENBLAS_NUM_THREADS"));
-#elif defined (USEBLIS)
-  printf("# BLIS\n");
-  printf("# nthr=%s\n", getenv("BLIS_NUM_THREADS"));
-#endif
-  printf("# n=%d\n",    n);
-  printf("# nrep=%d\n", nrep);
-  printf("#");
-#ifdef USEPAPI
-  for (l = 0; l < NEVENT; l++)
-  {
-    PAPI_event_code_to_name(event[l], event_s);
-    printf(" %s", event_s);
-  }
-#endif
-#ifdef USEGETTIME
-  printf(" time/ns\n");
-#endif
-
-#if defined(USEPAPI) || defined(USEGETTIME)
-  printf("%-15s: ", "cblas_dgemm");
-  for (l = 0; l <= NEVENT; l++)
-  {
-    printf("\t%.3e %.3e %.3e", (double) sum[l] / (nrep - 1.),
-                               (double) min[l], (double) max[l]);
-  }
-  printf("\n");
-#endif
-
-  return 0;
-}
+#include "myxgemm.h"
diff --git a/src/mygemm/mysgemm.c b/src/mygemm/mysgemm.c
index d7cc22e3e32ca87d669227ef8fc51d30c104bfd9..18ff7d2aeb9213b755c4b1c05fbd51c8f4635f64 100644
--- a/src/mygemm/mysgemm.c
+++ b/src/mygemm/mysgemm.c
@@ -2,209 +2,12 @@
 // SGEMM benchmark
 //==================================================================================================
 
-#include <stdio.h>
-#include <stdlib.h>
-#ifdef _OPENMP
-#include <omp.h>
-#endif
+typedef float my_t;
 
-#ifdef USEGETTIME
-#include <time.h>
-#endif
+#define MYBLIS_T BLIS_FLOAT
+#define MYXGEMM cblas_sgemm
 
-#ifdef USEPAPI
-#include "papi.h"
-#endif
-
-#ifdef USEOPENBLAS
-#include "cblas.h"
-#endif
-
-#ifdef USEBLIS
-#define BLIS_ENABLE_CBLAS 1
-#include "blis/blis.h"
-#endif
-
-#ifdef USEPAPI
-#define NEVENT 4
-int event[NEVENT] = { PAPI_REF_CYC, PAPI_L1_DCM, PAPI_L2_DCM, PAPI_TOT_INS };
-//int event[NEVENT] = { PAPI_TOT_CYC, PAPI_FP_INS, PAPI_L1_DCM };
-#else
-#define NEVENT 0
-#endif
-
-#define NREPMAX 10000
-
-float *a;
-float *b;
-float *c;
-float alpha;
-float beta;
-
-int main(int argc, char* argv[])
-{
-  int i, j, k, l;
-  int n, nrep;
-  long long cnt[NEVENT];
-  long long stat[NEVENT+1][NREPMAX];
-  long long sum[NEVENT+1];
-  long long min[NEVENT+1];
-  long long max[NEVENT+1];
-#ifdef USEPAPI
-  char event_s[PAPI_MAX_STR_LEN];
-#endif
-#ifdef USEGETTIME
-  struct timespec t0, t1;
-#endif
-
-  if (argc != 3)
-  {
-    fprintf(stderr, "Usage: %s <n> <nrep>\n", argv[0]);
-    return 1;
-  }
-
-  n = atoi(argv[1]);
-  nrep = atoi(argv[2]);
-
-  if (nrep > NREPMAX)
-  {
-    fprintf(stderr, "nrep exceeds NREPMAX (%d>%d)\n", nrep, NREPMAX);
-    return 1;
-  }
-
-  a = (float *) malloc(n * n * sizeof(float));
-  b = (float *) malloc(n * n * sizeof(float));
-  c = (float *) malloc(n * n * sizeof(float));
-
-  if (a == NULL || b == NULL || c == NULL)
-  {
-    fprintf(stderr, "malloc() failed\n");
-    return 1;
-  }
-
-#ifdef USEPAPI
-  if (PAPI_start_counters(event, NEVENT) != PAPI_OK)
-  {
-    fprintf(stderr, "PAPI_start_counters() failed\n");
-    //return 1;
-  }
-#endif
-
-#ifdef USEBLIS
-  bli_init();
-#endif
-
-  //------------------------------------------------------------------------------------------------
-  // Main loop
-  //------------------------------------------------------------------------------------------------
-
-  for (k = 0; k < nrep; k++)
-  {
-    for (i = 0; i < n; i++)
-      for (j = 0; j < n; j++)
-      {
-        a[i*n+j] = 1.0 + 0.01 * i + 0.0001 * j;
-        b[i*n+j] = 2.0 + 0.01 * i + 0.0001 * j;
-        c[i*n+j] = 3.0 + 0.01 * i + 0.0001 * j;
-      }
-    alpha = 0.5;
-    beta  = 0.6;
-
-#ifdef USEGETTIME
-    clock_gettime(CLOCK_PROCESS_CPUTIME_ID , &t0);
-#endif
-#ifdef USEPAPI
-    PAPI_read_counters(cnt, NEVENT);
-#endif
-
-    cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, n, n, alpha, a, n, b, n, beta ,c, n);
-
-#ifdef USEPAPI
-    PAPI_read_counters(cnt, NEVENT);
-#endif
-#ifdef USEGETTIME
-    clock_gettime(CLOCK_PROCESS_CPUTIME_ID , &t1);
-#endif
-
-#ifdef USEPAPI
-    for (l = 0; l < NEVENT; l++)
-      stat[l][k] = cnt[l];
-#endif
-#ifdef USEGETTIME
-    stat[NEVENT][k] = (long long) ((t1.tv_sec - t0.tv_sec) * 1e9 + (t1.tv_nsec - t0.tv_nsec));
-#endif
-  }
-
-  //------------------------------------------------------------------------------------------------
-  // Test output
-  //------------------------------------------------------------------------------------------------
-#if 0
-  for (k = 0; k < n; k++)
-  {
-    for (l = 0; l < n; l++)
-      printf(" (%.5f,%.5f)", creal(a[k*n+l]), cimag(a[k*n+l]));
-    printf("\t");
-    for (l = 0; l < n; l++)
-      printf(" (%.5f,%.5f)", creal(b[k*n+l]), cimag(b[k*n+l]));
-    printf("\t");
-    for (l = 0; l < n; l++)
-      printf(" (%.5f,%.5f)", creal(c[k*n+l]), cimag(c[k*n+l]));
-    printf("\n");
-  }
-#endif
-
-  //------------------------------------------------------------------------------------------------
-  // Generate statistics
-  //------------------------------------------------------------------------------------------------
-
-  for (l = 0; l <= NEVENT; l++)
-  {
-    sum[l] = 0;
-    min[l] = -1;
-    max[l] = 0;
-    for (k = 1; k < nrep; k++)
-    {
-      sum[l] += stat[l][k];
-      min[l]  = (min[l] < 0 || stat[l][k] < min[l]) ? stat[l][k] : min[l];
-      max[l]  = (stat[l][k] > max[l]) ? stat[l][k] : max[l];
-    }
-  }
-
-  //------------------------------------------------------------------------------------------------
-  // Print results
-  //------------------------------------------------------------------------------------------------
-
-#if defined(USEOPENBLAS)
-  printf("# OpenBLAS\n");
-  printf("# nthr=%s\n", getenv("OPENBLAS_NUM_THREADS"));
-#elif defined (USEBLIS)
-  printf("# BLIS\n");
-  printf("# nthr=%s\n", getenv("BLIS_NUM_THREADS"));
-#endif
-  printf("# n=%d\n",    n);
-  printf("# nrep=%d\n", nrep);
-  printf("#");
-#ifdef USEPAPI
-  for (l = 0; l < NEVENT; l++)
-  {
-    PAPI_event_code_to_name(event[l], event_s);
-    printf(" %s", event_s);
-  }
-#endif
-#ifdef USEGETTIME
-  printf(" time/ns\n");
-#endif
-
-#if defined(USEPAPI) || defined(USEGETTIME)
-  printf("%-15s: ", "cblas_sgemm");
-  for (l = 0; l <= NEVENT; l++)
-  {
-    printf("\t%.3e %.3e %.3e", (float) sum[l] / (nrep - 1.),
-                               (float) min[l], (float) max[l]);
-  }
-  printf("\n");
-#endif
-
-  return 0;
-}
+#define xstr(a) str(a)
+#define str(a) #a
 
+#include "myxgemm.h"
diff --git a/src/mygemm/myzgemm.c b/src/mygemm/myzgemm.c
index 145b0479c6669985664e12a3c7bf1ff7ed5f2124..aea4ec6702a21c60757032f68202b3a491336b8a 100644
--- a/src/mygemm/myzgemm.c
+++ b/src/mygemm/myzgemm.c
@@ -2,211 +2,16 @@
 // ZGEMM benchmark
 //==================================================================================================
 
-#include <stdio.h>
-#include <stdlib.h>
 #include <complex.h>
-#ifdef _OPENMP
-#include <omp.h>
-#endif
 
-#ifdef USEGETTIME
-#include <time.h>
-#endif
+typedef double complex my_t;
 
-#ifdef USEPAPI
-#include "papi.h"
-#endif
+#define CMPLX
 
-#ifdef USEOPENBLAS
-#include "cblas.h"
-#endif
+#define MYBLIS_T BLIS_DCOMPLEX
+#define MYXGEMM cblas_zgemm
 
-#ifdef USEBLIS
-#define BLIS_ENABLE_CBLAS 1
-#include "blis/blis.h"
-#endif
+#define xstr(a) str(a)
+#define str(a) #a
 
-#ifdef USEPAPI
-#define NEVENT 4
-int event[NEVENT] = { PAPI_REF_CYC, PAPI_L1_DCM, PAPI_L2_DCM, PAPI_TOT_INS };
-//int event[NEVENT] = { PAPI_TOT_CYC, PAPI_FP_INS, PAPI_L1_DCM };
-#else
-#define NEVENT 0
-#endif
-
-#define NREPMAX 10000
-
-typedef double complex complex_t;
-
-complex_t *a;
-complex_t *b;
-complex_t *c;
-complex_t alpha;
-complex_t beta;
-
-int main(int argc, char* argv[])
-{
-  int i, j, k, l;
-  int n, nrep;
-  long long cnt[NEVENT];
-  long long stat[NEVENT+1][NREPMAX];
-  long long sum[NEVENT+1];
-  long long min[NEVENT+1];
-  long long max[NEVENT+1];
-#ifdef USEPAPI
-  char event_s[PAPI_MAX_STR_LEN];
-#endif
-#ifdef USEGETTIME
-  struct timespec t0, t1;
-#endif
-
-  if (argc != 3)
-  {
-    fprintf(stderr, "Usage: %s <n> <nrep>\n", argv[0]);
-    return 1;
-  }
-
-  n = atoi(argv[1]);
-  nrep = atoi(argv[2]);
-
-  if (nrep > NREPMAX)
-  {
-    fprintf(stderr, "nrep exceeds NREPMAX (%d>%d)\n", nrep, NREPMAX);
-    return 1;
-  }
-
-  a = (complex_t *) malloc(n * n * sizeof(complex_t));
-  b = (complex_t *) malloc(n * n * sizeof(complex_t));
-  c = (complex_t *) malloc(n * n * sizeof(complex_t));
-
-  if (a == NULL || b == NULL || c == NULL)
-  {
-    fprintf(stderr, "malloc() failed\n");
-    return 1;
-  }
-
-#ifdef USEPAPI
-  if (PAPI_start_counters(event, NEVENT) != PAPI_OK)
-  {
-    fprintf(stderr, "PAPI_start_counters() failed\n");
-    //return 1;
-  }
-#endif
-
-#ifdef USEBLIS
-  bli_init();
-#endif
-
-  //------------------------------------------------------------------------------------------------
-  // Main loop
-  //------------------------------------------------------------------------------------------------
-
-  for (k = 0; k < nrep; k++)
-  {
-    for (i = 0; i < n; i++)
-      for (j = 0; j < n; j++)
-      {
-        a[i*n+j] = 1.0 + 0.01 * i + 0.0001 * j;
-        b[i*n+j] = 2.0 + 0.01 * i + 0.0001 * j;
-        c[i*n+j] = 3.0 + 0.01 * i + 0.0001 * j;
-      }
-    alpha = 0.5;
-    beta  = 0.6;
-
-#ifdef USEGETTIME
-    clock_gettime(CLOCK_PROCESS_CPUTIME_ID , &t0);
-#endif
-#ifdef USEPAPI
-    PAPI_read_counters(cnt, NEVENT);
-#endif
-
-    cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, n, n, &alpha, a, n, b, n, &beta ,c, n);
-
-#ifdef USEPAPI
-    PAPI_read_counters(cnt, NEVENT);
-#endif
-#ifdef USEGETTIME
-    clock_gettime(CLOCK_PROCESS_CPUTIME_ID , &t1);
-#endif
-
-#ifdef USEPAPI
-    for (l = 0; l < NEVENT; l++)
-      stat[l][k] = cnt[l];
-#endif
-#ifdef USEGETTIME
-    stat[NEVENT][k] = (long long) ((t1.tv_sec - t0.tv_sec) * 1e9 + (t1.tv_nsec - t0.tv_nsec));
-#endif
-  }
-
-  //------------------------------------------------------------------------------------------------
-  // Test output
-  //------------------------------------------------------------------------------------------------
-#if 0
-  for (k = 0; k < n; k++)
-  {
-    for (l = 0; l < n; l++)
-      printf(" (%.5f,%.5f)", creal(a[k*n+l]), cimag(a[k*n+l]));
-    printf("\t");
-    for (l = 0; l < n; l++)
-      printf(" (%.5f,%.5f)", creal(b[k*n+l]), cimag(b[k*n+l]));
-    printf("\t");
-    for (l = 0; l < n; l++)
-      printf(" (%.5f,%.5f)", creal(c[k*n+l]), cimag(c[k*n+l]));
-    printf("\n");
-  }
-#endif
-
-  //------------------------------------------------------------------------------------------------
-  // Generate statistics
-  //------------------------------------------------------------------------------------------------
-
-  for (l = 0; l <= NEVENT; l++)
-  {
-    sum[l] = 0;
-    min[l] = -1;
-    max[l] = 0;
-    for (k = 1; k < nrep; k++)
-    {
-      sum[l] += stat[l][k];
-      min[l]  = (min[l] < 0 || stat[l][k] < min[l]) ? stat[l][k] : min[l];
-      max[l]  = (stat[l][k] > max[l]) ? stat[l][k] : max[l];
-    }
-  }
-
-  //------------------------------------------------------------------------------------------------
-  // Print results
-  //------------------------------------------------------------------------------------------------
-
-#if defined(USEOPENBLAS)
-  printf("# OpenBLAS\n");
-  printf("# nthr=%s\n", getenv("OPENBLAS_NUM_THREADS"));
-#elif defined (USEBLIS)
-  printf("# BLIS\n");
-  printf("# nthr=%s\n", getenv("BLIS_NUM_THREADS"));
-#endif
-  printf("# n=%d\n",    n);
-  printf("# nrep=%d\n", nrep);
-  printf("#");
-#ifdef USEPAPI
-  for (l = 0; l < NEVENT; l++)
-  {
-    PAPI_event_code_to_name(event[l], event_s);
-    printf(" %s", event_s);
-  }
-#endif
-#ifdef USEGETTIME
-  printf(" time/ns\n");
-#endif
-
-#if defined(USEPAPI) || defined(USEGETTIME)
-  printf("%-15s: ", "cblas_zgemm");
-  for (l = 0; l <= NEVENT; l++)
-  {
-    printf("\t%.3e %.3e %.3e", (double) sum[l] / (nrep - 1.),
-                               (double) min[l], (double) max[l]);
-  }
-  printf("\n");
-#endif
-
-  return 0;
-}
+#include "myxgemm.h"