diff --git a/Makefile b/Makefile index 48f7cd68ab446cf2bd2102f8b6f72307d4f19ad1..00f04649121b853c53b2bfc52e92c52e88763dee 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ TARGS=$(patsubst examples/%.c,generated/%.x,$(SRC)) all : $(TARGS) $(TARGS) : $(M4_SRC) armclang -march=armv8+sve $(patsubst generated/%.x,generated/%.c,$@) -o $@ - armie -msve-vector-bits=128 $@ + armie -msve-vector-bits=256 $@ .PHONY : m4 clean test diff --git a/README.md b/README.md index 6e2702491b8b107045f6fc4ef42bdea794bb1dc7..385afcbabd8b593d926250ad7d4c1a61c378317b 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,73 @@ -# Sve code gen +## SVE code generator -Code generator for sve (MB2020 work package 7, task 1) \ No newline at end of file +--- + +### Overview +This simple code generator generates C code (vectorized with intrinsics or inline assembly) for simple equations. + +Four m4 macro functions are provided: + +- `svedef(name, datatype, size)` +This function defines the vector which we want to use in equation. +`name` is the name of the array pointer, which we wish to vectorize. +`type` is the datatype of the array elements. It can be either `float` `double` `complex_float` `complex_double>`. +`size` is the name of the variable holding the size of the vector. +This function expands into an empty string. +- `disable_complex` +If the datatype of vectors is either `complex\_float` or `complex\_double`, generated code will by default use complex arithmetic instructions. This macro disables complex arithmetics instruction, and generates code with normal floating point arithmetics. +this function expands into an empty string. +- `sveasmfor(equation)` +`equation` is the equation we want to vectorize. It can be of the following format `output = input1 <+|-|\*|/> input2 [<+|-> input3]`, where `input1`, `input2` or `input3` are the `name` arguments to `svedef()`. If not they will be treated like scalars. This function expand to the actual C code, where vectorization is done via inline assembly. + +- `sveintrfor(equation)` +Same as previous, but the code is generated with intrinsic functions. + +--- + +### Simple demo + +Lets take an example of zaxpy routine: **y = a * x + y** + +Create a file named zaxpy with the following content: +``` +svedef(x, complex double, n) +svedef(y, complex double, n) +sveasmfor( y = a ∗ x + y ) +``` + +Invoke the generator with command `m4 <path-to-sve-code-gen>/sve.m4 zaxpy > zaxpy.c` +This will generate the following code in the file zaxpy.c. + +``` +/*#####################---sve-generated-code---###################################*/ + +__asm__ volatile +( + "ldr x0, %[n] \n\t" + "lsl x0, x0, #1 \n\t" + "mov x1, xzr \n\t" + "whilelo p0.d, x1, x0 \n\t" +"sveloop: \n\t" + "ld1rqd z0.d, p0/z, %[a] \n\t" + "ld1d z1.d, p0/z, [%[x], x1, lsl #3] \n\t" + "ld1d z2.d, p0/z, [%[y], x1, lsl #3] \n\t" + "fcmla z2.d, p0/m, z0.d, z1.d, 0 \n\t" + "fcmla z2.d, p0/m, z0.d, z1.d, 90 \n\t" + "st1d z2.d, p0, [%[y], x1, lsl #3] \n\t" + "uqincd x1 \n\t" + "whilelo p0.d, x1, x0 \n\t" + "b.any sveloop " +: // output operands +: // input operands +[a] "m" (a), +[x] "r" (x), +[y] "r" (y), +[y] "r" (y), +[n] "m" (n) +: // register clobber list +"memory","cc","x0", "z0","z1","z2","z3","z4","z5","z6","z7","z8","p0" +); + + +/*#####################---sve-generated-code---###################################*/ +``` diff --git a/examples/complex_double_addmul.c b/examples/complex_double_addmul.c index bec08e9998be0f803fe6828313a84f3ca7b145d3..81a8860fd0fa46f6d480c6e84dc69c5fcaf226f0 100644 --- a/examples/complex_double_addmul.c +++ b/examples/complex_double_addmul.c @@ -38,7 +38,8 @@ int main(int argc, char* argv[]) svedef(x, complex_double, n) svedef(z, complex_double, n) disable_complex -sveintrfor( c = x + y * z) + +sveasmfor( c = x + y * z) for (j = 0; j < n; j++) printf("%f %f \n", creal(c[j]), cimag(c[j])); diff --git a/examples/complex_double_muladd.c b/examples/complex_double_muladd.c index dc5e571e668ccf08f7a6e1d9fce2e0c89ce7eb78..a9e5a0cd2669469a753417100199800b7b3ec867 100644 --- a/examples/complex_double_muladd.c +++ b/examples/complex_double_muladd.c @@ -37,8 +37,7 @@ int main(int argc, char* argv[]) svedef(x, complex_double, n) svedef(z, complex_double, n) -disable_complex -sveintrfor( c = x * y + z) +sveasmfor( c = x * y + z) for (j = 0; j < n; j++) printf("%f %f \n", creal(c[j]), cimag(c[j])); diff --git a/examples/complex_double_product.c b/examples/complex_double_product.c index f1221be77be9d4a10a6873e07d57fc2a06b8fe37..b000348a14cb7b83962a7891429fe6bebf66937c 100644 --- a/examples/complex_double_product.c +++ b/examples/complex_double_product.c @@ -32,8 +32,7 @@ int main(int argc, char* argv[]) svedef(x, complex_double, n) svedef(y, complex_double, n) -disable_complex -sveintrfor( c = x * y) +sveasmfor( c = x * y) for (j = 0; j < n; j++) printf("%f %f \n", creal(c[j]), cimag(c[j])); diff --git a/examples/complex_double_scal_product.c b/examples/complex_double_scal_product.c index 89ef7c2a8faa962865042d789e931254be463de2..94c8c7e9eefd50211303bbf55fa44ad1b869262a 100644 --- a/examples/complex_double_scal_product.c +++ b/examples/complex_double_scal_product.c @@ -33,8 +33,7 @@ int main(int argc, char* argv[]) svedef(x, complex_double, n) //vedef(y, complex_double, n) - disable_complex -sveintrfor( c = x * y) +sveasmfor( c = x * y) for (j = 0; j < n; j++) printf("%f %f \n", creal(c[j]), cimag(c[j])); diff --git a/examples/complex_double_sum.c b/examples/complex_double_sum.c index f41568ab635aa3c6ac78407dd33603d720cb52d9..53546487da1d9f9e009935db31b14ea5ba175b6f 100644 --- a/examples/complex_double_sum.c +++ b/examples/complex_double_sum.c @@ -32,8 +32,7 @@ int main(int argc, char* argv[]) svedef(x, complex_double, n) svedef(y, complex_double, n) -disable_complex -sveintrfor( c = x + y) +sveasmfor( c = x + y) for (j = 0; j < n; j++) printf("%f %f \n", creal(c[j]), cimag(c[j])); diff --git a/examples/complex_float_mulsub.c b/examples/complex_float_mulsub.c index 8c2aa67091cf7a96ccce35052c3f08f20e7c335f..182cc1455981461d727b5d70d0cdb26ad6368c7e 100644 --- a/examples/complex_float_mulsub.c +++ b/examples/complex_float_mulsub.c @@ -37,8 +37,7 @@ int main(int argc, char* argv[]) svedef(x, complex_float, n) svedef(z, complex_float, n) -disable_complex -sveintrfor( c = y * x - z) +sveasmfor( c = y * x - z) for (j = 0; j < n; j++) printf("%f %f \n", creal(c[j]), cimag(c[j])); diff --git a/examples/complex_float_scalar_sub.c b/examples/complex_float_scalar_sub.c index 8b7196c4e75aeb52ccc41ea918a943a70a89a965..38ee0d4b932e6d1a099660f1e223c8e81eca1266 100644 --- a/examples/complex_float_scalar_sub.c +++ b/examples/complex_float_scalar_sub.c @@ -36,8 +36,7 @@ int main(int argc, char* argv[]) printf("complex float sub"); svedef(y, complex_float, n) - disable_complex -sveintrfor( c = x - y) +sveasmfor( c = x - y) for (j = 0; j < n; j++) printf("%f %f \n", creal(c[j]), cimag(c[j])); diff --git a/examples/complex_float_sub.c b/examples/complex_float_sub.c index 87f9a1d5dc0a90c04c266d21ffbc44c3adacad68..d99e58e0bd294b9fe5032eb2ad4b77c5e479faa2 100644 --- a/examples/complex_float_sub.c +++ b/examples/complex_float_sub.c @@ -35,8 +35,7 @@ int main(int argc, char* argv[]) svedef(x, complex_float, n) svedef(y, complex_float, n) -disable_complex -sveintrfor( c = x - y) +sveasmfor( c = x - y) for (j = 0; j < n; j++) printf("%f %f \n", creal(c[j]), cimag(c[j])); diff --git a/examples/complex_float_submul.c b/examples/complex_float_submul.c index edaaf66ba3bf6744802658345265f0a37799ad6a..7ac714a46f32a4f349ab777351f9edfcc350efe5 100644 --- a/examples/complex_float_submul.c +++ b/examples/complex_float_submul.c @@ -37,8 +37,7 @@ int main(int argc, char* argv[]) svedef(x, complex_float, n) svedef(z, complex_float, n) -disable_complex -sveintrfor( c = y - x * z) +sveasmfor( c = y - x * z) for (j = 0; j < n; j++) printf("%f %f \n", creal(c[j]), cimag(c[j])); diff --git a/examples/double_sum.c b/examples/double_sum.c index 187d95afed5d3e0f9743104bab4c66b69bde28fa..3403225d7ddbac8d696bb65d4058b28342404d18 100644 --- a/examples/double_sum.c +++ b/examples/double_sum.c @@ -30,8 +30,7 @@ int main(int argc, char* argv[]) svedef(x, double, n) svedef(y, double, n) -disable_complex -sveintrfor(c = x + y) +sveasmfor(c = x + y) printf("c= %f %f %f %f %f %f\n", c[0], c[1], c[2], c[3], c[4], c[5]); //printf("%f %f %f %f %f %f\n", c[0], c[1], c[2], c[3], c[4], c[5]); diff --git a/examples/float_mul.c b/examples/float_mul.c index f0e94b260f08523f88a32c681f58c5d693fc777c..b56b37e779c78951d212bafa86f5a2efa14ea06d 100644 --- a/examples/float_mul.c +++ b/examples/float_mul.c @@ -29,7 +29,7 @@ int main(int argc, char* argv[]) svedef(x, float, n) svedef(y, float, n) disable_complex -sveintrfor(c = x * y) +sveasmfor(c = x * y) printf("float mul"); printf("c= %f %f %f %f %f %f\n", c[0], c[1], c[2], c[3], c[4], c[5]); diff --git a/sve.m4 b/sve.m4 index b7ebf3f93e191117bf45efc1cc0fc0b8977e7a09..1c587cfa8befafbfaca00caba6c31e778399cbeb 100644 --- a/sve.m4 +++ b/sve.m4 @@ -33,7 +33,6 @@ divert /*#####################---sve-generated-code---###################################*/ uint64_t counter = 0; - svbool_t indir(`datatype')_sve_intr_while_lower( counter, size ifelse(iscomplex, `yes', `* 2')) do { @@ -48,7 +47,6 @@ ifdef(`input3', `indir(`datatype')_sve_intr_load(input3, counter, vector3)', `indir(`datatype')_sve_intr_dup(input3, vector3)')') - ifelse(operation, `mul', datatype`_sve_intr_mul(`vector1', `vector2', output, `counter')', operation, `add', datatype`_sve_intr_add(`vector1', `vector2', output, `counter')', operation, `sub', datatype`_sve_intr_sub(`vector1', `vector2', output, `counter')', @@ -63,10 +61,6 @@ indir(`datatype')_sve_intr_inc(counter) } while (indir(`datatype')_sve_intr_any(pg)); - - - - /*#####################---sve-generated-code---###################################*/ ')dnl dnl @@ -97,6 +91,7 @@ dnl `define(`complex_equation', indir(input2`sve', `complex'))') divert /*#####################---sve-generated-code---###################################*/ + ifdef(`disable_complex_instructions', `ifdef(input1`sve',`', `ifelse( @@ -106,8 +101,8 @@ double input1`imag' = cimag(input1); ', datatype, `complex_float', `float input1`real' = creal(input1); float input1`imag' = cimag(input1); -')')') - +')')')dnl +dnl ifdef(`disable_complex_instructions', `ifdef(input2`sve',`', `ifelse( @@ -117,8 +112,8 @@ double input2`imag' = cimag(input2); ', datatype, `complex_float', `float input2`real' = creal(input2); float input2`imag' = cimag(input2); -')')') - +')')')dnl +dnl ifdef(`input3', `ifdef(`disable_complex_instructions', `ifdef(input3`sve',`', @@ -129,8 +124,7 @@ double input3`imag' = cimag(input3); ', datatype, `complex_float', `float input3`real' = creal(input3); float input3`imag' = cimag(input3); -')')')') - +')')')')dnl __asm__ volatile ( "ldr x0, %[size] \n\t" @@ -200,11 +194,14 @@ ifdef(input2`sve', `[input2] "m" (input2),')') ifdef(`input3', `ifdef(input3`sve', -`[input3] "r" (input3),', +`[input3] "r" (input3), +', `ifdef(`disable_complex_instructions', `[input3`real'] "m" (input3`real'), -[input3`imag'] "m" (input3`imag'),', -`[input3] "m" (input3),')')') +[input3`imag'] "m" (input3`imag'), +', +`[input3] "m" (input3), +')')')dnl [output] "r" (output), [size] "m" (size) : // register clobber list