From ef84d0224934f07d6d71ad37ddd2d89ef7987433 Mon Sep 17 00:00:00 2001 From: Christopher Haine <chaine@cray.com> Date: Wed, 11 Nov 2020 10:41:28 -0600 Subject: [PATCH] fixed non-squared transpose --- tests/check_layout.c | 28 +++++++++++++++++----------- transformation/transformation.c | 8 ++++---- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/tests/check_layout.c b/tests/check_layout.c index 5fe3ab43..1f97bf7d 100644 --- a/tests/check_layout.c +++ b/tests/check_layout.c @@ -49,7 +49,8 @@ #include <errno.h> #include "maestro/logging.h" -#define N 10 +#define N 20 +#define M 40 #define ROWMAJ MSTRO_ATTR_CORE_CDO_LAYOUT_ORDER_ROWMAJOR #define COLMAJ MSTRO_ATTR_CORE_CDO_LAYOUT_ORDER_COLMAJOR @@ -73,7 +74,7 @@ CHEAT_DECLARE ( ) CHEAT_TEST(layout_attribute_works, - size_t data_count = N*N; + size_t data_count = N*M; int64_t bytes = data_count*sizeof(double); double* src_data = malloc(bytes); double* unpooled_data = malloc(bytes); @@ -91,13 +92,18 @@ CHEAT_TEST(layout_attribute_works, /* Layout information */ int64_t ndims, patt_src, patt_dst, elsz; - int64_t* dimsz; + int64_t* dimsz_src, *dimsz_dst; elsz = sizeof(double); ndims = 2; - dimsz = malloc(sizeof(int64_t)*ndims); - cheat_assert(dimsz != NULL); - dimsz[0] = N; - dimsz[1] = N; + dimsz_src = malloc(sizeof(int64_t)*ndims); + cheat_assert(dimsz_src != NULL); + dimsz_src[0] = N; + dimsz_src[1] = M; + dimsz_dst = malloc(sizeof(int64_t)*ndims); + cheat_assert(dimsz_dst != NULL); + dimsz_dst[0] = M; + dimsz_dst[1] = N; + patt_src = ROWMAJ; patt_dst = COLMAJ; @@ -117,7 +123,7 @@ CHEAT_TEST(layout_attribute_works, cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_src, MSTRO_ATTR_CORE_CDO_LAYOUT_NDIMS, &ndims)); cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_src, - MSTRO_ATTR_CORE_CDO_LAYOUT_DIMS_SIZE, dimsz)); + MSTRO_ATTR_CORE_CDO_LAYOUT_DIMS_SIZE, dimsz_src)); cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_src, MSTRO_ATTR_CORE_CDO_LAYOUT_ORDER, &patt_src)); cheat_assert(MSTRO_OK == mstro_cdo_declaration_seal(cdo_src)); @@ -130,7 +136,7 @@ CHEAT_TEST(layout_attribute_works, cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_dst, MSTRO_ATTR_CORE_CDO_LAYOUT_NDIMS, &ndims)); cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_dst, - MSTRO_ATTR_CORE_CDO_LAYOUT_DIMS_SIZE, dimsz)); + MSTRO_ATTR_CORE_CDO_LAYOUT_DIMS_SIZE, dimsz_dst)); cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_dst, MSTRO_ATTR_CORE_CDO_LAYOUT_ORDER, &patt_dst)); cheat_assert(MSTRO_OK == mstro_cdo_declaration_seal(cdo_dst)); @@ -153,7 +159,7 @@ CHEAT_TEST(layout_attribute_works, cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_unpooled, MSTRO_ATTR_CORE_CDO_LAYOUT_NDIMS, &ndims)); cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_unpooled, - MSTRO_ATTR_CORE_CDO_LAYOUT_DIMS_SIZE, dimsz)); + MSTRO_ATTR_CORE_CDO_LAYOUT_DIMS_SIZE, dimsz_dst)); cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_unpooled, MSTRO_ATTR_CORE_CDO_LAYOUT_ORDER, &patt_dst)); cheat_assert(MSTRO_OK == mstro_cdo_declaration_seal(cdo_unpooled)); @@ -185,7 +191,7 @@ CHEAT_TEST(layout_attribute_works, cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_unpooled_t, MSTRO_ATTR_CORE_CDO_LAYOUT_NDIMS, &ndims)); cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_unpooled_t, - MSTRO_ATTR_CORE_CDO_LAYOUT_DIMS_SIZE, dimsz)); + MSTRO_ATTR_CORE_CDO_LAYOUT_DIMS_SIZE, dimsz_src)); cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_unpooled_t, MSTRO_ATTR_CORE_CDO_LAYOUT_ORDER, &patt_src)); cheat_assert(MSTRO_OK == mstro_cdo_declaration_seal(cdo_unpooled_t)); diff --git a/transformation/transformation.c b/transformation/transformation.c index ced4eb66..fe99469d 100644 --- a/transformation/transformation.c +++ b/transformation/transformation.c @@ -198,12 +198,12 @@ mstro_transform_transpose( int64_t src_d1 = ((const int64_t*)src_dimsz)[1]; int64_t dst_d0 = ((const int64_t*)dst_dimsz)[0]; int64_t dst_d1 = ((const int64_t*)dst_dimsz)[1]; - +/* if(! (src_d0==src_d1 && src_d0==dst_d1 && dst_d0==dst_d1)) { DEBUG("Can only handle square matrices\n"); return MSTRO_NOMATCH; } - +*/ /* actual transposition */ /* FIXME: should be using mamba API */ { @@ -212,7 +212,7 @@ mstro_transform_transpose( int64_t eltsize; int i,j; int64_t k; - + src_ptr = m_src->allocation->ptr; dst_ptr = m_dst->allocation->ptr; @@ -221,7 +221,7 @@ mstro_transform_transpose( for (j=0; j<dst_d0; j++) for (k=0; k<eltsize; k++) dst_ptr[i*dst_d0*eltsize + j*eltsize +k] - = src_ptr[j*dst_d0*eltsize + i*eltsize+k]; + = src_ptr[j*dst_d1*eltsize + i*eltsize+k]; } return MSTRO_OK; -- GitLab