From ef84d0224934f07d6d71ad37ddd2d89ef7987433 Mon Sep 17 00:00:00 2001
From: Christopher Haine <chaine@cray.com>
Date: Wed, 11 Nov 2020 10:41:28 -0600
Subject: [PATCH] fixed non-squared transpose

---
 tests/check_layout.c            | 28 +++++++++++++++++-----------
 transformation/transformation.c |  8 ++++----
 2 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/tests/check_layout.c b/tests/check_layout.c
index 5fe3ab43..1f97bf7d 100644
--- a/tests/check_layout.c
+++ b/tests/check_layout.c
@@ -49,7 +49,8 @@
 #include <errno.h>
 #include "maestro/logging.h"
 
-#define N 10
+#define N 20
+#define M 40
 #define ROWMAJ MSTRO_ATTR_CORE_CDO_LAYOUT_ORDER_ROWMAJOR
 #define COLMAJ MSTRO_ATTR_CORE_CDO_LAYOUT_ORDER_COLMAJOR
 
@@ -73,7 +74,7 @@ CHEAT_DECLARE (
                )
 
 CHEAT_TEST(layout_attribute_works,
-  size_t data_count = N*N;
+  size_t data_count = N*M;
   int64_t bytes = data_count*sizeof(double);
   double* src_data = malloc(bytes);
   double* unpooled_data = malloc(bytes);
@@ -91,13 +92,18 @@ CHEAT_TEST(layout_attribute_works,
 
   /* Layout information */
   int64_t ndims, patt_src, patt_dst, elsz;
-  int64_t* dimsz;
+  int64_t* dimsz_src, *dimsz_dst;
   elsz = sizeof(double);
   ndims = 2;
-  dimsz = malloc(sizeof(int64_t)*ndims);
-  cheat_assert(dimsz != NULL);
-  dimsz[0] = N;
-  dimsz[1] = N;
+  dimsz_src = malloc(sizeof(int64_t)*ndims);
+  cheat_assert(dimsz_src != NULL);
+  dimsz_src[0] = N;
+  dimsz_src[1] = M;
+  dimsz_dst = malloc(sizeof(int64_t)*ndims);
+  cheat_assert(dimsz_dst != NULL);
+  dimsz_dst[0] = M;
+  dimsz_dst[1] = N;
+
   patt_src = ROWMAJ;
   patt_dst = COLMAJ;
 
@@ -117,7 +123,7 @@ CHEAT_TEST(layout_attribute_works,
   cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_src,
 			  MSTRO_ATTR_CORE_CDO_LAYOUT_NDIMS, &ndims));
   cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_src,
-			  MSTRO_ATTR_CORE_CDO_LAYOUT_DIMS_SIZE, dimsz));
+			  MSTRO_ATTR_CORE_CDO_LAYOUT_DIMS_SIZE, dimsz_src));
   cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_src,
 			  MSTRO_ATTR_CORE_CDO_LAYOUT_ORDER, &patt_src));
   cheat_assert(MSTRO_OK == mstro_cdo_declaration_seal(cdo_src));
@@ -130,7 +136,7 @@ CHEAT_TEST(layout_attribute_works,
   cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_dst,
 			  MSTRO_ATTR_CORE_CDO_LAYOUT_NDIMS, &ndims));
   cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_dst,
-			  MSTRO_ATTR_CORE_CDO_LAYOUT_DIMS_SIZE, dimsz));
+			  MSTRO_ATTR_CORE_CDO_LAYOUT_DIMS_SIZE, dimsz_dst));
   cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_dst,
 			  MSTRO_ATTR_CORE_CDO_LAYOUT_ORDER, &patt_dst));
   cheat_assert(MSTRO_OK == mstro_cdo_declaration_seal(cdo_dst));
@@ -153,7 +159,7 @@ CHEAT_TEST(layout_attribute_works,
   cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_unpooled,
 			  MSTRO_ATTR_CORE_CDO_LAYOUT_NDIMS, &ndims));
   cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_unpooled,
-			  MSTRO_ATTR_CORE_CDO_LAYOUT_DIMS_SIZE, dimsz));
+			  MSTRO_ATTR_CORE_CDO_LAYOUT_DIMS_SIZE, dimsz_dst));
   cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_unpooled,
 			  MSTRO_ATTR_CORE_CDO_LAYOUT_ORDER, &patt_dst));
   cheat_assert(MSTRO_OK == mstro_cdo_declaration_seal(cdo_unpooled));
@@ -185,7 +191,7 @@ CHEAT_TEST(layout_attribute_works,
   cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_unpooled_t, 
 			  MSTRO_ATTR_CORE_CDO_LAYOUT_NDIMS, &ndims)); 
   cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_unpooled_t, 
-			  MSTRO_ATTR_CORE_CDO_LAYOUT_DIMS_SIZE, dimsz)); 
+			  MSTRO_ATTR_CORE_CDO_LAYOUT_DIMS_SIZE, dimsz_src)); 
   cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_unpooled_t, 
 			  MSTRO_ATTR_CORE_CDO_LAYOUT_ORDER, &patt_src)); 
   cheat_assert(MSTRO_OK == mstro_cdo_declaration_seal(cdo_unpooled_t));
diff --git a/transformation/transformation.c b/transformation/transformation.c
index ced4eb66..fe99469d 100644
--- a/transformation/transformation.c
+++ b/transformation/transformation.c
@@ -198,12 +198,12 @@ mstro_transform_transpose(
   int64_t src_d1 = ((const int64_t*)src_dimsz)[1];
   int64_t dst_d0 = ((const int64_t*)dst_dimsz)[0];
   int64_t dst_d1 = ((const int64_t*)dst_dimsz)[1];
-
+/*
   if(! (src_d0==src_d1 && src_d0==dst_d1 && dst_d0==dst_d1)) {
     DEBUG("Can only handle square matrices\n");
     return MSTRO_NOMATCH;
   }
-
+*/
   /* actual transposition */
   /* FIXME: should be using mamba API */
   {
@@ -212,7 +212,7 @@ mstro_transform_transpose(
   int64_t eltsize;
   int i,j;
   int64_t k;
-  
+
   src_ptr = m_src->allocation->ptr;
   dst_ptr = m_dst->allocation->ptr;
 
@@ -221,7 +221,7 @@ mstro_transform_transpose(
     for (j=0; j<dst_d0; j++)
       for (k=0; k<eltsize; k++)
         dst_ptr[i*dst_d0*eltsize + j*eltsize +k]
-            = src_ptr[j*dst_d0*eltsize + i*eltsize+k];
+            = src_ptr[j*dst_d1*eltsize + i*eltsize+k];
   
   }
   return MSTRO_OK;
-- 
GitLab