From ab8c87801bf0f2b7e6a1de93848c0467a8efa265 Mon Sep 17 00:00:00 2001 From: Bine Brank <b.brank@fz-juelich.de> Date: Fri, 29 Jan 2021 12:14:17 +0100 Subject: [PATCH] add outer-loop vectorisation --- linear-algebra/blas/gemver/gemver.c | 69 ++++++++++++++++++++++- linear-algebra/blas/gesummv/gesummv.c | 31 +++++++++- linear-algebra/blas/symm/symm.c | 48 ++++++++++++++-- linear-algebra/blas/trmm/trmm.c | 28 +++++++++ linear-algebra/kernels/2mm/2mm.c | 47 +++++++++++++++ linear-algebra/kernels/3mm/3mm.c | 61 ++++++++++++++++++++ linear-algebra/kernels/doitgen/doitgen.c | 32 ++++++++++- linear-algebra/kernels/mvt/mvt.c | 34 +++++++++++ utilities/.run-all.pl.swp | Bin 0 -> 12288 bytes utilities/makefile-gen.pl | 2 +- utilities/run-all.pl | 2 - 11 files changed, 343 insertions(+), 11 deletions(-) create mode 100644 utilities/.run-all.pl.swp diff --git a/linear-algebra/blas/gemver/gemver.c b/linear-algebra/blas/gemver/gemver.c index 65620c0..b930ef2 100644 --- a/linear-algebra/blas/gemver/gemver.c +++ b/linear-algebra/blas/gemver/gemver.c @@ -104,7 +104,73 @@ void kernel_gemver(int n, DATA_TYPE POLYBENCH_1D(z,N,n)) { int i, j; -#ifdef USEINTRINSICS + +#ifdef VECTORIZE_OUTER +#ifdef __ARM_FEATURE_SVE +#pragma scop + int slice = svcntd(); + svfloat64_t betav = svdup_f64(beta); + svfloat64_t alphav = svdup_f64(alpha); + svuint64_t indv = svindex_u64(0, _PB_N); + for (i = 0; i < _PB_N; i++){ + j = 0; + svfloat64_t u1v = svdup_f64(u1[i]); + svfloat64_t u2v = svdup_f64(u2[i]); + svbool_t pg = svwhilelt_b64(j, _PB_N); + do { + svfloat64_t a = svld1(pg, &A[i][j]); + svfloat64_t v1v = svld1(pg, &v1[j]); + svfloat64_t v2v = svld1(pg, &v2[j]); + svfloat64_t temp = svmul_z(pg, u2v, v2v); + a = svmla_z(pg, a, v1v, u1v); + svst1_f64(pg, &A[i][j], svadd_z(pg, a, temp)); + j += svcntd(); + pg = svwhilelt_b64(j, _PB_N); + } while (svptest_any(svptrue_b64(), pg)); + } + + for (i = 0; i < _PB_N; i += slice) { + svbool_t pg = svwhilelt_b64(i, _PB_N); + svfloat64_t xv = svld1(pg, &x[i]); + for (j = 0; j < _PB_N; j++) { + svfloat64_t av = svld1(pg, &A[j][i]); + svfloat64_t yv = svdup_f64(y[j]); + svfloat64_t temp = svmul_z(pg, betav, av); + xv = svmla_z(pg, xv, temp, yv); + } + svst1_f64(pg, &x[i], xv); + } + + i = 0; + svbool_t pg = svwhilelt_b64(i, _PB_N); + do { + svfloat64_t xv = svld1(pg, &x[i]); + svfloat64_t zv = svld1(pg, &z[i]); + svfloat64_t temp = svadd_z(pg, xv, zv); + svst1_f64(pg, &x[i], temp); + i += svcntd(); + pg = svwhilelt_b64(i, _PB_N); + } while (svptest_any(svptrue_b64(), pg)); + + for (i = 0; i < _PB_N; i += slice) { + svbool_t pg = svwhilelt_b64(i, _PB_N); + svfloat64_t wv = svld1(pg, &w[i]); + for (j = 0; j < _PB_N; j++) { + svfloat64_t av = svld1_gather_index(pg, &A[i][j], indv); + svfloat64_t xv = svdup_f64(x[j]); + svfloat64_t temp = svmul_z(pg, alphav, av); + wv = svmla_z(pg, wv, temp, xv); + } + svst1_f64(pg, &w[i], wv); + } + +#pragma endscop +#else +#error -DVECTORIZE_OUTER used but no support for SVE +#endif + + +#elif defined(USEINTRINSICS) #ifdef __ARM_FEATURE_SVE #pragma scop for (i = 0; i < _PB_N; i++){ @@ -166,7 +232,6 @@ void kernel_gemver(int n, } while (svptest_any(svptrue_b64(), pg)); } #pragma endscop -#pragma endscop #else #error -DUSEINTRISICS used but no support for SVE #endif diff --git a/linear-algebra/blas/gesummv/gesummv.c b/linear-algebra/blas/gesummv/gesummv.c index 54a2b60..9a04787 100644 --- a/linear-algebra/blas/gesummv/gesummv.c +++ b/linear-algebra/blas/gesummv/gesummv.c @@ -88,7 +88,36 @@ void kernel_gesummv(int n, { int i, j; -#ifdef USEINTRINSICS +#ifdef VECTORIZE_OUTER +#ifdef __ARM_FEATURE_SVE +#pragma scop + int slice = svcntd(); + svfloat64_t betav = svdup_f64(beta); + svfloat64_t alphav = svdup_f64(alpha); + svuint64_t indv = svindex_u64(0, _PB_N); + for (i = 0; i < _PB_N; i += slice) + { + svbool_t pg = svwhilelt_b64(i, _PB_N); + svfloat64_t tmpv = svdup_f64(0.0); + svfloat64_t yv = svdup_f64(0.0); + for (j = 0; j < _PB_N; j++) + { + svfloat64_t xv = svdup_f64(x[j]); + svfloat64_t av = svld1_gather_index(pg, &A[i][j], indv); + svfloat64_t bv = svld1_gather_index(pg, &B[i][j], indv); + tmpv = svmla_z(pg, tmpv, av, xv); + yv = svmla_z(pg, yv, bv, xv); + } + svfloat64_t tv = svmul_z(pg, betav, yv); + yv = svmla_z(pg, tv, alphav, tmpv); + svst1_f64(pg, &y[i], yv); + } +#pragma endscop +#else +#error -DVECTORIZE_OUTER used but no support for SVE +#endif + +#elif defined(USEINTRINSICS) #ifdef __ARM_FEATURE_SVE #pragma scop for (i = 0; i < _PB_N; i++) diff --git a/linear-algebra/blas/symm/symm.c b/linear-algebra/blas/symm/symm.c index 956aeeb..4744294 100644 --- a/linear-algebra/blas/symm/symm.c +++ b/linear-algebra/blas/symm/symm.c @@ -20,6 +20,9 @@ #include <sys/prctl.h> #endif +#ifdef __ARM_FEATURE_SVE +#include <arm_sve.h> +#endif /* Include benchmark-specific header. */ #include "symm.h" @@ -87,14 +90,50 @@ void kernel_symm(int m, int n, int i, j, k; DATA_TYPE temp2; -//BLAS PARAMS -//SIDE = 'L' -//UPLO = 'L' +// BLAS PARAMS +// SIDE = 'L' +// UPLO = 'L' // => Form C := alpha*A*B + beta*C // A is MxM // B is MxN // C is MxN -//note that due to Fortran array layout, the code below more closely resembles upper triangular case in BLAS +// note that due to Fortran array layout, the code below more closely resembles upper triangular case in BLAS + +#if defined(VECTORIZE_OUTER) +#ifdef __ARM_FEATURE_SVE +#pragma scop + int slice = svcntd(); + svfloat64_t betav = svdup_f64(beta); + svfloat64_t alphav = svdup_f64(alpha); + svuint64_t indv = svindex_u64(0, _PB_N); + for (i = 0; i < _PB_M; i++) + for (j = 0; j < _PB_N; j += slice ) + { + svfloat64_t temp2v = svdup_f64(0.0); + svbool_t pg = svwhilelt_b64(j, _PB_N); + svfloat64_t bijv = svld1(pg, &B[i][j]); + for (k = 0; k < i; k++) { + svfloat64_t aikv = svdup_f64(A[i][k]); + svfloat64_t ckjv = svld1(pg, &C[k][j]); + svfloat64_t tempv = svmul_z(pg, alphav, bijv); + svst1_f64(pg, &C[k][j], svmla_z(pg, ckjv, tempv, aikv)); + + svfloat64_t bkjv = svld1(pg, &B[k][j]); + temp2v = svmla_z(pg, temp2v, bkjv, aikv); + } + svfloat64_t cijv = svld1(pg, &C[i][j]); + svfloat64_t bc = svmul_z(pg, betav, cijv); + svfloat64_t aiiv = svdup_f64(A[i][i]); + svfloat64_t aba = svmul_z(pg, alphav, svmul_z(pg, bijv, aiiv)); + svfloat64_t tempv = svadd_z(pg, bc, aba); + svst1_f64(pg, &C[i][j], svmla_z(pg, tempv, alphav, temp2v)); + } +#pragma endscop +#else +#error -DVECTORIZE_OUTER used but no support for SVE +#endif + +#else #pragma scop for (i = 0; i < _PB_M; i++) for (j = 0; j < _PB_N; j++ ) @@ -107,6 +146,7 @@ void kernel_symm(int m, int n, C[i][j] = beta * C[i][j] + alpha*B[i][j] * A[i][i] + alpha * temp2; } #pragma endscop +#endif } diff --git a/linear-algebra/blas/trmm/trmm.c b/linear-algebra/blas/trmm/trmm.c index dccab73..acbc236 100644 --- a/linear-algebra/blas/trmm/trmm.c +++ b/linear-algebra/blas/trmm/trmm.c @@ -20,6 +20,9 @@ #include <sys/prctl.h> #endif +#ifdef __ARM_FEATURE_SVE +#include <arm_sve.h> +#endif /* __ARM_FEATURE_SVE */ /* Include benchmark-specific header. */ #include "trmm.h" @@ -88,6 +91,30 @@ void kernel_trmm(int m, int n, // => Form B := alpha*A**T*B. // A is MxM // B is MxN +#ifdef VECTORIZE_OUTER +#ifdef __ARM_FEATURE_SVE +#pragma scop + int slice = svcntd(); + svfloat64_t alphav = svdup_f64(alpha); + svuint64_t indv = svindex_u64(0, _PB_N); + for (i = 0; i < _PB_M; i++) { + for (j = 0; j < _PB_N; j += slice) { + svbool_t pg = svwhilelt_b64(j, _PB_N); + svfloat64_t bijv = svld1(pg, &B[i][j]); + for (k = i+1; k < _PB_M; k++) { + svfloat64_t akiv = svdup_f64(A[k][i]); + svfloat64_t bkjv = svld1(pg, &B[k][j]); + bijv = svmla_z(pg, bijv, akiv, bkjv); + } + bijv = svmul_z(pg, alphav, bijv); + svst1_f64(pg, &B[i][j], bijv); + } + } +#pragma endcop +#else +#error -DVECTORIZE_OUTER used but no support for SVE +#endif +#else #pragma scop for (i = 0; i < _PB_M; i++) for (j = 0; j < _PB_N; j++) { @@ -96,6 +123,7 @@ void kernel_trmm(int m, int n, B[i][j] = alpha * B[i][j]; } #pragma endscop +#endif } diff --git a/linear-algebra/kernels/2mm/2mm.c b/linear-algebra/kernels/2mm/2mm.c index 562c2f5..a0f5bd4 100644 --- a/linear-algebra/kernels/2mm/2mm.c +++ b/linear-algebra/kernels/2mm/2mm.c @@ -20,6 +20,9 @@ #include <sys/prctl.h> #endif +#ifdef __ARM_FEATURE_SVE +#include <arm_sve.h> +#endif /* __ARM_FEATURE_SVE */ /* Include benchmark-specific header. */ #include "2mm.h" @@ -89,7 +92,50 @@ void kernel_2mm(int ni, int nj, int nk, int nl, DATA_TYPE POLYBENCH_2D(D,NI,NL,ni,nl)) { int i, j, k; +#ifdef VECTORIZE_OUTER +#ifdef __ARM_FEATURE_SVE +#pragma scop + int slice = svcntd(); + svfloat64_t betav = svdup_f64(beta); + svfloat64_t alphav = svdup_f64(alpha); + /* D := alpha*A*B*C + beta*D */ + for (i = 0; i < _PB_NI; i++) { + for (j = 0; j < _PB_NJ; j += slice) + { + svbool_t pg = svwhilelt_b64(j, _PB_NJ); + svfloat64_t tmpijv = svdup_f64(0.0); + //tmp[i][j] = SCALAR_VAL(0.0); + for (k = 0; k < _PB_NK; ++k) { + svfloat64_t aikv = svdup_f64(A[i][k]); + svfloat64_t bkjv = svld1(pg, &B[k][j]); + svfloat64_t tmp = svmul_z(pg, alphav, aikv); + tmpijv = svmla_z(pg, tmpijv, tmp, bkjv); + } + svst1_f64(pg, &tmp[i][j], tmpijv); + } + } + for (i = 0; i < _PB_NI; i++) { + for (j = 0; j < _PB_NL; j += slice) + { + svbool_t pg = svwhilelt_b64(j, _PB_NL); + svfloat64_t dijv = svld1(pg, &D[i][j]); + dijv = svmul_z(pg, dijv, betav); + for (k = 0; k < _PB_NJ; ++k) { + svfloat64_t tmpikv = svdup_f64(tmp[i][k]); + svfloat64_t ckjv = svld1(pg, &C[k][j]); + dijv = svmla_z(pg, dijv, tmpikv, ckjv); + } + svst1_f64(pg, &D[i][j], dijv); + } + } + +#pragma endscop +#else +#error -DVECTORIZE_OUTER used but no support for SVE +#endif + +#else #pragma scop /* D := alpha*A*B*C + beta*D */ for (i = 0; i < _PB_NI; i++) @@ -107,6 +153,7 @@ void kernel_2mm(int ni, int nj, int nk, int nl, D[i][j] += tmp[i][k] * C[k][j]; } #pragma endscop +#endif } diff --git a/linear-algebra/kernels/3mm/3mm.c b/linear-algebra/kernels/3mm/3mm.c index b339912..6b93c53 100644 --- a/linear-algebra/kernels/3mm/3mm.c +++ b/linear-algebra/kernels/3mm/3mm.c @@ -20,6 +20,9 @@ #include <sys/prctl.h> #endif +#ifdef __ARM_FEATURE_SVE +#include <arm_sve.h> +#endif /* __ARM_FEATURE_SVE */ /* Include benchmark-specific header. */ #include "3mm.h" @@ -85,7 +88,64 @@ void kernel_3mm(int ni, int nj, int nk, int nl, int nm, DATA_TYPE POLYBENCH_2D(G,NI,NL,ni,nl)) { int i, j, k; +#ifdef VECTORIZE_OUTER +#ifdef __ARM_FEATURE_SVE +#pragma scop + int slice = svcntd(); + /* E := A*B */ + for (i = 0; i < _PB_NI; i++) + { + for (j = 0; j < _PB_NJ; j += slice) + { + svbool_t pg = svwhilelt_b64(j, _PB_NJ); + svfloat64_t eijv = svdup_f64(0.0); + for (k = 0; k < _PB_NK; ++k) { + svfloat64_t aikv = svdup_f64(A[i][k]); + svfloat64_t bkjv = svld1(pg, &B[k][j]); + eijv = svmla_z(pg, eijv, aikv, bkjv); + } + svst1_f64(pg, &E[i][j], eijv); + } + } + /* F := C*D */ + for (i = 0; i < _PB_NJ; i++) + { + for (j = 0; j < _PB_NL; j += slice) + { + svbool_t pg = svwhilelt_b64(j, _PB_NL); + svfloat64_t fijv = svdup_f64(0.0); + for (k = 0; k < _PB_NM; ++k) + { + svfloat64_t cikv = svdup_f64(C[i][k]); + svfloat64_t dkjv = svld1(pg, &D[k][j]); + fijv = svmla_z(pg, fijv, cikv, dkjv); + } + svst1_f64(pg, &F[i][j], fijv); + } + } + /* G := E*F */ + for (i = 0; i < _PB_NI; i++) + { + for (j = 0; j < _PB_NL; j += slice) + { + svbool_t pg = svwhilelt_b64(j, _PB_NL); + svfloat64_t gijv = svdup_f64(0.0); + for (k = 0; k < _PB_NJ; ++k) + { + svfloat64_t eikv = svdup_f64(E[i][k]); + svfloat64_t fkjv = svld1(pg, &F[k][j]); + gijv = svmla_z(pg, gijv, eikv, fkjv); + } + svst1_f64(pg, &G[i][j], gijv); + } + } +#pragma endscop +#else +#error -DVECTORIZE_OUTER used but no support for SVE +#endif + +#else #pragma scop /* E := A*B */ for (i = 0; i < _PB_NI; i++) @@ -112,6 +172,7 @@ void kernel_3mm(int ni, int nj, int nk, int nl, int nm, G[i][j] += E[i][k] * F[k][j]; } #pragma endscop +#endif } diff --git a/linear-algebra/kernels/doitgen/doitgen.c b/linear-algebra/kernels/doitgen/doitgen.c index ed6fcdc..d3667d2 100644 --- a/linear-algebra/kernels/doitgen/doitgen.c +++ b/linear-algebra/kernels/doitgen/doitgen.c @@ -20,6 +20,9 @@ #include <sys/prctl.h> #endif +#ifdef __ARM_FEATURE_SVE +#include <arm_sve.h> +#endif /* __ARM_FEATURE_SVE */ /* Include benchmark-specific header. */ #include "doitgen.h" @@ -74,7 +77,33 @@ void kernel_doitgen(int nr, int nq, int np, DATA_TYPE POLYBENCH_1D(sum,NP,np)) { int r, q, p, s; - +#ifdef VECTORIZE_OUTER +#ifdef __ARM_FEATURE_SVE +#pragma scop + int slice = svcntd(); + for (r = 0; r < _PB_NR; r++) { + for (q = 0; q < _PB_NQ; q++) { + for (p = 0; p < _PB_NP; p += slice) { + svfloat64_t sumv = svdup_f64(0.0); + svbool_t pg = svwhilelt_b64(p, _PB_NP); + //sum[p] = SCALAR_VAL(0.0); + for (s = 0; s < _PB_NP; s++) { + svfloat64_t c4sp = svld1(pg, &C4[s][p]); + svfloat64_t arqsv = svdup_f64(A[r][q][s]); + sumv = svmla_z(pg, sumv, arqsv, c4sp); + } + svst1_f64(pg, &sum[p], sumv); + } + for (p = 0; p < _PB_NP; p++) + A[r][q][p] = sum[p]; + } + } +#pragma endscop +#else +#error -DVECTORIZE_OUTER used but no support for SVE +#endif + +#else #pragma scop for (r = 0; r < _PB_NR; r++) for (q = 0; q < _PB_NQ; q++) { @@ -87,6 +116,7 @@ void kernel_doitgen(int nr, int nq, int np, A[r][q][p] = sum[p]; } #pragma endscop +#endif } diff --git a/linear-algebra/kernels/mvt/mvt.c b/linear-algebra/kernels/mvt/mvt.c index 88fa97d..c8ac137 100644 --- a/linear-algebra/kernels/mvt/mvt.c +++ b/linear-algebra/kernels/mvt/mvt.c @@ -20,6 +20,9 @@ #include <sys/prctl.h> #endif +#ifdef __ARM_FEATURE_SVE +#include <arm_sve.h> +#endif /* __ARM_FEATURE_SVE */ /* Include benchmark-specific header. */ #include "mvt.h" @@ -89,7 +92,37 @@ void kernel_mvt(int n, DATA_TYPE POLYBENCH_2D(A,N,N,n,n)) { int i, j; +#ifdef VECTORIZE_OUTER +#ifdef __ARM_FEATURE_SVE +#pragma scop + int slice = svcntd(); + svuint64_t indv = svindex_u64(0, _PB_N); + for (i = 0; i < _PB_N; i += slice) { + svbool_t pg = svwhilelt_b64(i, _PB_N); + svfloat64_t xv = svld1(pg, &x1[i]); + for (j = 0; j < _PB_N; j++) { + svfloat64_t y1v = svdup_f64(y_1[j]); + svfloat64_t av = svld1_gather_index(pg, &A[i][j], indv); + xv = svmla_z(pg, xv, av, y1v); + } + svst1_f64(pg, &x1[i], xv); + } + for (i = 0; i < _PB_N; i += slice) { + svbool_t pg = svwhilelt_b64(i, _PB_N); + svfloat64_t xv = svld1(pg, &x2[i]); + for (j = 0; j < _PB_N; j++) { + svfloat64_t y2v = svdup_f64(y_2[j]); + svfloat64_t av = svld1(pg, &A[j][i]); + xv = svmla_z(pg, xv, av, y2v); + } + svst1_f64(pg, &x2[i], xv); + } +#pragma endscop +#else +#error -DVECTORIZE_OUTER used but no support for SVE +#endif +#else #pragma scop for (i = 0; i < _PB_N; i++) for (j = 0; j < _PB_N; j++) @@ -98,6 +131,7 @@ void kernel_mvt(int n, for (j = 0; j < _PB_N; j++) x2[i] = x2[i] + A[j][i] * y_2[j]; #pragma endscop +#endif } diff --git a/utilities/.run-all.pl.swp b/utilities/.run-all.pl.swp new file mode 100644 index 0000000000000000000000000000000000000000..dcd1ff98da5998a0f8ea923613dbd1df017d20aa GIT binary patch literal 12288 zcmYc?2=nw+u+TGNU|?VnU|?APRyg64P=jF?9|J>DQDR=UAxHv0&MHkTPtDX#G_t_2 zt`1_LenEatWm0Nha)xeld~sQ-erZW&PG(7FYO#J%X`XIkPL5td4t|qHWk*9`Gz3Tr zfzpyRT?<|YV<STYkUnK4MFnA@P!MwzkA}c#2#kinXb6mkz-S1JhQMeDjE2By2#k;r zC@ElMsAphcV1oKL8A>yv(NOLvH5vk=Aut*OqaiRF0;3@?8UmvsFd71*Aut*OqaiRF z0;3@?1VbP(g@IuX69a<_3uOKu*8l&&&%p4WpMhZ|KLbN7KLbNFKLdjaKLdjiKLdjy zKLdj-KLdjdKLdjRKLf)LJ_d&Gd<+Z^`4|{B@G&sd@i8#e@-Z-E@i8!P@G&s_;bma> z#mm6(lb3<v2QLG|Q(gv!6TA!z>v<U%*77nitl?!~n8nM$(8SBYki^Tt;Kj?pu#AU+ zp@oNmp@N5jp^S%tp_GS#p@fHlp_qq(!Hb81L5_!k;S)Cl!zFG8hArF-4AZ$87^ZPE zFnDn@FnDq^FgS2CFeq~~F#P3WVA#XOz%Y}GfuVwnfx(Z9fx(xHfx(E2fq{dIfq|Wi zf#D%11H&3l28P+33=A_k85mqR85nFi85nFh85s6)Ffi=pU|`t8!N8Eu!N6e4!N8!w z!N9=9!NBl{oq=IDI|IWqb_Rx4b_RxAb_NDLb_NC>b_ND+b_RyeYzz!X*ccdAvoSDK zvN14tvoSF6vN14RU}a#~!OFnU#LB?n%gVqY#LB>Ml7)dGj)j3Cl7)dGf`x&>orQtH zl?4(n&zKn)o-z-?#5L;V!4d+s3<?Sg$vOGOsVSL73ND^O)(jvKu&@Fc6clCVm85AX z<(HPEW#*(>#gs6BWeICyAXXuY>8iShX2mfjs%B|1C8{Y1MxK(6f=YI3QC@0}j)F=_ zW^SsEf=Y2_6^Ket%gHY&)=^MN%g-+XJ6J(q!O+0KK+_uJ8?ZkVlpzvG9)TJcQvx<F zrUYzUOo<BEG!P9lEhbM1$)MayWQ%MSlJj#5N=s7Xb5nEkiz+pgpvHpS3vwT_8$eni zj#aQ#fY_s;ub=_8UqK7(WCJij*93Nw0>UmZpP*GB>p>P7BDr2ABQ-H4wJ1J0Kd&S; zuSCJNP9ax6Gc6@GO+m%QF~l+0HAGJ<CQnbxULz(&OEV@<A4xOF{pk7?Qu3>F@{<#D zG#ng*+{1KK^fhgvmRQ@^)ne7HV5^{{Qj%Ddo?7CPS)>mQF})0=U{+Ru##cyYZmJb5 z#vuv73YHR3B1TyO<UCNyK=ZS*f=YgANkM5zd~r!pW?nkP&me_X3K}sfdLVxy`BGV- zAhD=8RRN+$0h}SgNeGgHG?YL(^Gi#hj#5I3D@a7zDioI{6_*sj9BrVZplfKYpj?!i zn_rfyke6DXlbM&QkercNl$czSS_Ds{U>|EJflWa+7A6jH5!4r06vX5q3nGO^f=Xgp zYEfc(YJ75jZf;^;N&+HUK*~@IR<Kn_P{C9OaRf|4PYKmBh%8i)6Q)W*&sIT6!N|@| zLB&5b#Ld&kRY@T;O+f?3Q^-qIP*>NqW>5wJ1(guTAa~c0co)wgq{V-r;EqoMtqaUe zEXvj^&QQ>Ur5IfmXMf)SPaoHKXMbN`M?V+63MFd>M2vy_hURln(o0TJK%_ovg@VeG zjQl(twnMe+6;#3!9fqa4D!#6c!J$E}dL_99Ap2AD^HQNn8Mjjrx+)Yv0R&0f_zkya zNXchN%P&&MRLIOzs5aEoGc*8~#^Cf>l&hedMw}~AqaPaCm_dUYlJE=*%3lzUp1Que zo=Qq)5vqX@zkrm1rQiuXFSVisoB$Lxbm4ITH$xLCC&5*M47aUQkBiaM(*mUw1qFrj zjLe)=1r1Q-f@&>Wg`(8N6cEeBGe}dR8f063L24dU1XNpqOB#jzB88O9R0XA!%%arf zlKi4dSSnD+%P&z#%P-AK(SwyJAj2VE1o;AyE+LLhPAsto#Y1Xha)yFRa$<>shC^~< zNosn2QD$l}$PFO<U{ws@oS}eJ?RbMLF>s72C@92~fYLXZ0V=LzO5DL^drS$uF4M?% z(o|3YI}GGCm_{hAo|~GIlUfN48mJ(QQZFt^%}dVADOT6PrZpw8Br!KLFEcM4yF93v zN-WY%%t=p8DoWHZ&d(`JEy7_Inp#K-0IAKbR6q(F1zQCTbu_g}If)>J4A2HcYDH#= zh9M*^fCDd0L&-C*EHNiDMFHgE;?xp7P;h8kGl0tyXsxB7Xse*)>*?ni4;4^SP*a1~ zX<*r4Uq>Gw+|s_TE}o&jxaEC7r4w#x*NBiHM^rVilmIOsKuHNyM@AdRS~GxB2wLd{ zmp6jSLwy1lH-w5q3sX>r1(i48A^@rIRIpV@Pc2C<Pl4o0WmG8@uoDbop~0sN;wsqL zDj0xVizg0BixbmR6$(;|aug8ju@xYhL>H9VppgS^5M}1T>zZiYqEt}Dn_LpB5DiM~ zHF^1EsmZa(aR_mQqOF24*bzmkg{7HAsR~L+E=Q^nAc+gw;L<C|QL<(rP6s$^K{SAZ zACav=A*lxn%F^Og1?Tb<g~D=;<nk0~Flt&efW#r5ux4OT2JzfLtzoO+oYJC#)FO}| zgR(++QD#X=YMw$;r9w!4Zhl&6Zl*$HX?CWLf`OsFp{atAfuSjbGJ~>$hD&O3W_n&~ ziUO!K&rQq*HA{+1i&7O*%TkLf6(Bx@g(P?Ze6d1eT1je=LT-LaW?E))Vo7FxUa=;F zvO-v9ab`&|R3p4>(@{uH%*iQM$W6>n)lo>yOHnAvNX=77tw>ESElDi~@f6^dDg%SE RqJC*{k$zHUo<1ms836fF&kFzm literal 0 HcmV?d00001 diff --git a/utilities/makefile-gen.pl b/utilities/makefile-gen.pl index 152c3bf..dfea220 100644 --- a/utilities/makefile-gen.pl +++ b/utilities/makefile-gen.pl @@ -151,7 +151,7 @@ open FILE, '>'.$TARGET_DIR.'/config.mk'; print FILE << "EOF"; asdfCFLAGS=/scratch/gem5_utils/libm5.a -march=armv8-a+sve -O3 -ffp-contract=fast -static -lpthread -DUSEM5OPS -I/scratch/gem5_utils -DPOLYBENCH_NO_FLUSH_CACHE -DREPEATKERNEL -CFLAGS=-march=armv8-a+sve -O3 -ffp-contract=fast -static -lpthread -DPOLYBENCH_DUMP_ARRAYS -DSVE_OPTIMIZED +CFLAGS=-march=armv8-a+sve -O3 -ffp-contract=fast -static -lpthread NOVEC_GCC=-fno-tree-vectorize NOVEC_CLANG=-fno-vectorize diff --git a/utilities/run-all.pl b/utilities/run-all.pl index fe624e2..5991301 100644 --- a/utilities/run-all.pl +++ b/utilities/run-all.pl @@ -58,8 +58,6 @@ foreach $cat (@categories) { my $targetDir = $target.'/'.$dir; my $command = "cd $targetDir; - #make clean; - #make; rm -f $kernel-$COMPILE_COMMAND-$MEASURE.tmp; for i in {1..10} do -- GitLab