Skip to content
Snippets Groups Projects
Commit ef84d022 authored by Christopher Haine's avatar Christopher Haine
Browse files

fixed non-squared transpose

parent ba95ee29
Branches
Tags
2 merge requests!3Jsc ci update,!2update JSC-CI branch to devel
Pipeline #51768 failed
...@@ -49,7 +49,8 @@ ...@@ -49,7 +49,8 @@
#include <errno.h> #include <errno.h>
#include "maestro/logging.h" #include "maestro/logging.h"
#define N 10 #define N 20
#define M 40
#define ROWMAJ MSTRO_ATTR_CORE_CDO_LAYOUT_ORDER_ROWMAJOR #define ROWMAJ MSTRO_ATTR_CORE_CDO_LAYOUT_ORDER_ROWMAJOR
#define COLMAJ MSTRO_ATTR_CORE_CDO_LAYOUT_ORDER_COLMAJOR #define COLMAJ MSTRO_ATTR_CORE_CDO_LAYOUT_ORDER_COLMAJOR
...@@ -73,7 +74,7 @@ CHEAT_DECLARE ( ...@@ -73,7 +74,7 @@ CHEAT_DECLARE (
) )
CHEAT_TEST(layout_attribute_works, CHEAT_TEST(layout_attribute_works,
size_t data_count = N*N; size_t data_count = N*M;
int64_t bytes = data_count*sizeof(double); int64_t bytes = data_count*sizeof(double);
double* src_data = malloc(bytes); double* src_data = malloc(bytes);
double* unpooled_data = malloc(bytes); double* unpooled_data = malloc(bytes);
...@@ -91,13 +92,18 @@ CHEAT_TEST(layout_attribute_works, ...@@ -91,13 +92,18 @@ CHEAT_TEST(layout_attribute_works,
/* Layout information */ /* Layout information */
int64_t ndims, patt_src, patt_dst, elsz; int64_t ndims, patt_src, patt_dst, elsz;
int64_t* dimsz; int64_t* dimsz_src, *dimsz_dst;
elsz = sizeof(double); elsz = sizeof(double);
ndims = 2; ndims = 2;
dimsz = malloc(sizeof(int64_t)*ndims); dimsz_src = malloc(sizeof(int64_t)*ndims);
cheat_assert(dimsz != NULL); cheat_assert(dimsz_src != NULL);
dimsz[0] = N; dimsz_src[0] = N;
dimsz[1] = N; dimsz_src[1] = M;
dimsz_dst = malloc(sizeof(int64_t)*ndims);
cheat_assert(dimsz_dst != NULL);
dimsz_dst[0] = M;
dimsz_dst[1] = N;
patt_src = ROWMAJ; patt_src = ROWMAJ;
patt_dst = COLMAJ; patt_dst = COLMAJ;
...@@ -117,7 +123,7 @@ CHEAT_TEST(layout_attribute_works, ...@@ -117,7 +123,7 @@ CHEAT_TEST(layout_attribute_works,
cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_src, cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_src,
MSTRO_ATTR_CORE_CDO_LAYOUT_NDIMS, &ndims)); MSTRO_ATTR_CORE_CDO_LAYOUT_NDIMS, &ndims));
cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_src, cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_src,
MSTRO_ATTR_CORE_CDO_LAYOUT_DIMS_SIZE, dimsz)); MSTRO_ATTR_CORE_CDO_LAYOUT_DIMS_SIZE, dimsz_src));
cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_src, cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_src,
MSTRO_ATTR_CORE_CDO_LAYOUT_ORDER, &patt_src)); MSTRO_ATTR_CORE_CDO_LAYOUT_ORDER, &patt_src));
cheat_assert(MSTRO_OK == mstro_cdo_declaration_seal(cdo_src)); cheat_assert(MSTRO_OK == mstro_cdo_declaration_seal(cdo_src));
...@@ -130,7 +136,7 @@ CHEAT_TEST(layout_attribute_works, ...@@ -130,7 +136,7 @@ CHEAT_TEST(layout_attribute_works,
cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_dst, cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_dst,
MSTRO_ATTR_CORE_CDO_LAYOUT_NDIMS, &ndims)); MSTRO_ATTR_CORE_CDO_LAYOUT_NDIMS, &ndims));
cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_dst, cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_dst,
MSTRO_ATTR_CORE_CDO_LAYOUT_DIMS_SIZE, dimsz)); MSTRO_ATTR_CORE_CDO_LAYOUT_DIMS_SIZE, dimsz_dst));
cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_dst, cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_dst,
MSTRO_ATTR_CORE_CDO_LAYOUT_ORDER, &patt_dst)); MSTRO_ATTR_CORE_CDO_LAYOUT_ORDER, &patt_dst));
cheat_assert(MSTRO_OK == mstro_cdo_declaration_seal(cdo_dst)); cheat_assert(MSTRO_OK == mstro_cdo_declaration_seal(cdo_dst));
...@@ -153,7 +159,7 @@ CHEAT_TEST(layout_attribute_works, ...@@ -153,7 +159,7 @@ CHEAT_TEST(layout_attribute_works,
cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_unpooled, cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_unpooled,
MSTRO_ATTR_CORE_CDO_LAYOUT_NDIMS, &ndims)); MSTRO_ATTR_CORE_CDO_LAYOUT_NDIMS, &ndims));
cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_unpooled, cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_unpooled,
MSTRO_ATTR_CORE_CDO_LAYOUT_DIMS_SIZE, dimsz)); MSTRO_ATTR_CORE_CDO_LAYOUT_DIMS_SIZE, dimsz_dst));
cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_unpooled, cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_unpooled,
MSTRO_ATTR_CORE_CDO_LAYOUT_ORDER, &patt_dst)); MSTRO_ATTR_CORE_CDO_LAYOUT_ORDER, &patt_dst));
cheat_assert(MSTRO_OK == mstro_cdo_declaration_seal(cdo_unpooled)); cheat_assert(MSTRO_OK == mstro_cdo_declaration_seal(cdo_unpooled));
...@@ -185,7 +191,7 @@ CHEAT_TEST(layout_attribute_works, ...@@ -185,7 +191,7 @@ CHEAT_TEST(layout_attribute_works,
cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_unpooled_t, cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_unpooled_t,
MSTRO_ATTR_CORE_CDO_LAYOUT_NDIMS, &ndims)); MSTRO_ATTR_CORE_CDO_LAYOUT_NDIMS, &ndims));
cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_unpooled_t, cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_unpooled_t,
MSTRO_ATTR_CORE_CDO_LAYOUT_DIMS_SIZE, dimsz)); MSTRO_ATTR_CORE_CDO_LAYOUT_DIMS_SIZE, dimsz_src));
cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_unpooled_t, cheat_assert(MSTRO_OK == mstro_cdo_attribute_set(cdo_unpooled_t,
MSTRO_ATTR_CORE_CDO_LAYOUT_ORDER, &patt_src)); MSTRO_ATTR_CORE_CDO_LAYOUT_ORDER, &patt_src));
cheat_assert(MSTRO_OK == mstro_cdo_declaration_seal(cdo_unpooled_t)); cheat_assert(MSTRO_OK == mstro_cdo_declaration_seal(cdo_unpooled_t));
......
...@@ -198,12 +198,12 @@ mstro_transform_transpose( ...@@ -198,12 +198,12 @@ mstro_transform_transpose(
int64_t src_d1 = ((const int64_t*)src_dimsz)[1]; int64_t src_d1 = ((const int64_t*)src_dimsz)[1];
int64_t dst_d0 = ((const int64_t*)dst_dimsz)[0]; int64_t dst_d0 = ((const int64_t*)dst_dimsz)[0];
int64_t dst_d1 = ((const int64_t*)dst_dimsz)[1]; int64_t dst_d1 = ((const int64_t*)dst_dimsz)[1];
/*
if(! (src_d0==src_d1 && src_d0==dst_d1 && dst_d0==dst_d1)) { if(! (src_d0==src_d1 && src_d0==dst_d1 && dst_d0==dst_d1)) {
DEBUG("Can only handle square matrices\n"); DEBUG("Can only handle square matrices\n");
return MSTRO_NOMATCH; return MSTRO_NOMATCH;
} }
*/
/* actual transposition */ /* actual transposition */
/* FIXME: should be using mamba API */ /* FIXME: should be using mamba API */
{ {
...@@ -221,7 +221,7 @@ mstro_transform_transpose( ...@@ -221,7 +221,7 @@ mstro_transform_transpose(
for (j=0; j<dst_d0; j++) for (j=0; j<dst_d0; j++)
for (k=0; k<eltsize; k++) for (k=0; k<eltsize; k++)
dst_ptr[i*dst_d0*eltsize + j*eltsize +k] dst_ptr[i*dst_d0*eltsize + j*eltsize +k]
= src_ptr[j*dst_d0*eltsize + i*eltsize+k]; = src_ptr[j*dst_d1*eltsize + i*eltsize+k];
} }
return MSTRO_OK; return MSTRO_OK;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment