Skip to content
Snippets Groups Projects
Commit 6813c5b4 authored by Bine Brank's avatar Bine Brank
Browse files

modified readme

parent bbaafa19
Branches
Tags
No related merge requests found
...@@ -15,7 +15,7 @@ TARGS=$(patsubst examples/%.c,generated/%.x,$(SRC)) ...@@ -15,7 +15,7 @@ TARGS=$(patsubst examples/%.c,generated/%.x,$(SRC))
all : $(TARGS) all : $(TARGS)
$(TARGS) : $(M4_SRC) $(TARGS) : $(M4_SRC)
armclang -march=armv8+sve $(patsubst generated/%.x,generated/%.c,$@) -o $@ armclang -march=armv8+sve $(patsubst generated/%.x,generated/%.c,$@) -o $@
armie -msve-vector-bits=128 $@ armie -msve-vector-bits=256 $@
.PHONY : m4 clean test .PHONY : m4 clean test
......
# Sve code gen ## SVE code generator
Code generator for sve (MB2020 work package 7, task 1) ---
\ No newline at end of file
### Overview
This simple code generator generates C code (vectorized with intrinsics or inline assembly) for simple equations.
Four m4 macro functions are provided:
- `svedef(name, datatype, size)`
This function defines the vector which we want to use in equation.
`name` is the name of the array pointer, which we wish to vectorize.
`type` is the datatype of the array elements. It can be either `float` `double` `complex_float` `complex_double>`.
`size` is the name of the variable holding the size of the vector.
This function expands into an empty string.
- `disable_complex`
If the datatype of vectors is either `complex\_float` or `complex\_double`, generated code will by default use complex arithmetic instructions. This macro disables complex arithmetics instruction, and generates code with normal floating point arithmetics.
this function expands into an empty string.
- `sveasmfor(equation)`
`equation` is the equation we want to vectorize. It can be of the following format `output = input1 <+|-|\*|/> input2 [<+|-> input3]`, where `input1`, `input2` or `input3` are the `name` arguments to `svedef()`. If not they will be treated like scalars. This function expand to the actual C code, where vectorization is done via inline assembly.
- `sveintrfor(equation)`
Same as previous, but the code is generated with intrinsic functions.
---
### Simple demo
Lets take an example of zaxpy routine: **y = a * x + y**
Create a file named zaxpy with the following content:
```
svedef(x, complex double, n)
svedef(y, complex double, n)
sveasmfor( y = a ∗ x + y )
```
Invoke the generator with command `m4 <path-to-sve-code-gen>/sve.m4 zaxpy > zaxpy.c`
This will generate the following code in the file zaxpy.c.
```
/*#####################---sve-generated-code---###################################*/
__asm__ volatile
(
"ldr x0, %[n] \n\t"
"lsl x0, x0, #1 \n\t"
"mov x1, xzr \n\t"
"whilelo p0.d, x1, x0 \n\t"
"sveloop: \n\t"
"ld1rqd z0.d, p0/z, %[a] \n\t"
"ld1d z1.d, p0/z, [%[x], x1, lsl #3] \n\t"
"ld1d z2.d, p0/z, [%[y], x1, lsl #3] \n\t"
"fcmla z2.d, p0/m, z0.d, z1.d, 0 \n\t"
"fcmla z2.d, p0/m, z0.d, z1.d, 90 \n\t"
"st1d z2.d, p0, [%[y], x1, lsl #3] \n\t"
"uqincd x1 \n\t"
"whilelo p0.d, x1, x0 \n\t"
"b.any sveloop "
: // output operands
: // input operands
[a] "m" (a),
[x] "r" (x),
[y] "r" (y),
[y] "r" (y),
[n] "m" (n)
: // register clobber list
"memory","cc","x0", "z0","z1","z2","z3","z4","z5","z6","z7","z8","p0"
);
/*#####################---sve-generated-code---###################################*/
```
...@@ -38,7 +38,8 @@ int main(int argc, char* argv[]) ...@@ -38,7 +38,8 @@ int main(int argc, char* argv[])
svedef(x, complex_double, n) svedef(x, complex_double, n)
svedef(z, complex_double, n) svedef(z, complex_double, n)
disable_complex disable_complex
sveintrfor( c = x + y * z)
sveasmfor( c = x + y * z)
for (j = 0; j < n; j++) for (j = 0; j < n; j++)
printf("%f %f \n", creal(c[j]), cimag(c[j])); printf("%f %f \n", creal(c[j]), cimag(c[j]));
......
...@@ -37,8 +37,7 @@ int main(int argc, char* argv[]) ...@@ -37,8 +37,7 @@ int main(int argc, char* argv[])
svedef(x, complex_double, n) svedef(x, complex_double, n)
svedef(z, complex_double, n) svedef(z, complex_double, n)
disable_complex sveasmfor( c = x * y + z)
sveintrfor( c = x * y + z)
for (j = 0; j < n; j++) for (j = 0; j < n; j++)
printf("%f %f \n", creal(c[j]), cimag(c[j])); printf("%f %f \n", creal(c[j]), cimag(c[j]));
......
...@@ -32,8 +32,7 @@ int main(int argc, char* argv[]) ...@@ -32,8 +32,7 @@ int main(int argc, char* argv[])
svedef(x, complex_double, n) svedef(x, complex_double, n)
svedef(y, complex_double, n) svedef(y, complex_double, n)
disable_complex sveasmfor( c = x * y)
sveintrfor( c = x * y)
for (j = 0; j < n; j++) for (j = 0; j < n; j++)
printf("%f %f \n", creal(c[j]), cimag(c[j])); printf("%f %f \n", creal(c[j]), cimag(c[j]));
......
...@@ -33,8 +33,7 @@ int main(int argc, char* argv[]) ...@@ -33,8 +33,7 @@ int main(int argc, char* argv[])
svedef(x, complex_double, n) svedef(x, complex_double, n)
//vedef(y, complex_double, n) //vedef(y, complex_double, n)
disable_complex sveasmfor( c = x * y)
sveintrfor( c = x * y)
for (j = 0; j < n; j++) for (j = 0; j < n; j++)
printf("%f %f \n", creal(c[j]), cimag(c[j])); printf("%f %f \n", creal(c[j]), cimag(c[j]));
......
...@@ -32,8 +32,7 @@ int main(int argc, char* argv[]) ...@@ -32,8 +32,7 @@ int main(int argc, char* argv[])
svedef(x, complex_double, n) svedef(x, complex_double, n)
svedef(y, complex_double, n) svedef(y, complex_double, n)
disable_complex sveasmfor( c = x + y)
sveintrfor( c = x + y)
for (j = 0; j < n; j++) for (j = 0; j < n; j++)
printf("%f %f \n", creal(c[j]), cimag(c[j])); printf("%f %f \n", creal(c[j]), cimag(c[j]));
......
...@@ -37,8 +37,7 @@ int main(int argc, char* argv[]) ...@@ -37,8 +37,7 @@ int main(int argc, char* argv[])
svedef(x, complex_float, n) svedef(x, complex_float, n)
svedef(z, complex_float, n) svedef(z, complex_float, n)
disable_complex sveasmfor( c = y * x - z)
sveintrfor( c = y * x - z)
for (j = 0; j < n; j++) for (j = 0; j < n; j++)
printf("%f %f \n", creal(c[j]), cimag(c[j])); printf("%f %f \n", creal(c[j]), cimag(c[j]));
......
...@@ -36,8 +36,7 @@ int main(int argc, char* argv[]) ...@@ -36,8 +36,7 @@ int main(int argc, char* argv[])
printf("complex float sub"); printf("complex float sub");
svedef(y, complex_float, n) svedef(y, complex_float, n)
disable_complex sveasmfor( c = x - y)
sveintrfor( c = x - y)
for (j = 0; j < n; j++) for (j = 0; j < n; j++)
printf("%f %f \n", creal(c[j]), cimag(c[j])); printf("%f %f \n", creal(c[j]), cimag(c[j]));
......
...@@ -35,8 +35,7 @@ int main(int argc, char* argv[]) ...@@ -35,8 +35,7 @@ int main(int argc, char* argv[])
svedef(x, complex_float, n) svedef(x, complex_float, n)
svedef(y, complex_float, n) svedef(y, complex_float, n)
disable_complex sveasmfor( c = x - y)
sveintrfor( c = x - y)
for (j = 0; j < n; j++) for (j = 0; j < n; j++)
printf("%f %f \n", creal(c[j]), cimag(c[j])); printf("%f %f \n", creal(c[j]), cimag(c[j]));
......
...@@ -37,8 +37,7 @@ int main(int argc, char* argv[]) ...@@ -37,8 +37,7 @@ int main(int argc, char* argv[])
svedef(x, complex_float, n) svedef(x, complex_float, n)
svedef(z, complex_float, n) svedef(z, complex_float, n)
disable_complex sveasmfor( c = y - x * z)
sveintrfor( c = y - x * z)
for (j = 0; j < n; j++) for (j = 0; j < n; j++)
printf("%f %f \n", creal(c[j]), cimag(c[j])); printf("%f %f \n", creal(c[j]), cimag(c[j]));
......
...@@ -30,8 +30,7 @@ int main(int argc, char* argv[]) ...@@ -30,8 +30,7 @@ int main(int argc, char* argv[])
svedef(x, double, n) svedef(x, double, n)
svedef(y, double, n) svedef(y, double, n)
disable_complex sveasmfor(c = x + y)
sveintrfor(c = x + y)
printf("c= %f %f %f %f %f %f\n", c[0], c[1], c[2], c[3], c[4], c[5]); printf("c= %f %f %f %f %f %f\n", c[0], c[1], c[2], c[3], c[4], c[5]);
//printf("%f %f %f %f %f %f\n", c[0], c[1], c[2], c[3], c[4], c[5]); //printf("%f %f %f %f %f %f\n", c[0], c[1], c[2], c[3], c[4], c[5]);
......
...@@ -29,7 +29,7 @@ int main(int argc, char* argv[]) ...@@ -29,7 +29,7 @@ int main(int argc, char* argv[])
svedef(x, float, n) svedef(x, float, n)
svedef(y, float, n) svedef(y, float, n)
disable_complex disable_complex
sveintrfor(c = x * y) sveasmfor(c = x * y)
printf("float mul"); printf("float mul");
printf("c= %f %f %f %f %f %f\n", c[0], c[1], c[2], c[3], c[4], c[5]); printf("c= %f %f %f %f %f %f\n", c[0], c[1], c[2], c[3], c[4], c[5]);
......
...@@ -33,7 +33,6 @@ divert ...@@ -33,7 +33,6 @@ divert
/*#####################---sve-generated-code---###################################*/ /*#####################---sve-generated-code---###################################*/
uint64_t counter = 0; uint64_t counter = 0;
svbool_t indir(`datatype')_sve_intr_while_lower( counter, size ifelse(iscomplex, `yes', `* 2')) svbool_t indir(`datatype')_sve_intr_while_lower( counter, size ifelse(iscomplex, `yes', `* 2'))
do do
{ {
...@@ -48,7 +47,6 @@ ifdef(`input3', ...@@ -48,7 +47,6 @@ ifdef(`input3',
`indir(`datatype')_sve_intr_load(input3, counter, vector3)', `indir(`datatype')_sve_intr_load(input3, counter, vector3)',
`indir(`datatype')_sve_intr_dup(input3, vector3)')') `indir(`datatype')_sve_intr_dup(input3, vector3)')')
ifelse(operation, `mul', datatype`_sve_intr_mul(`vector1', `vector2', output, `counter')', ifelse(operation, `mul', datatype`_sve_intr_mul(`vector1', `vector2', output, `counter')',
operation, `add', datatype`_sve_intr_add(`vector1', `vector2', output, `counter')', operation, `add', datatype`_sve_intr_add(`vector1', `vector2', output, `counter')',
operation, `sub', datatype`_sve_intr_sub(`vector1', `vector2', output, `counter')', operation, `sub', datatype`_sve_intr_sub(`vector1', `vector2', output, `counter')',
...@@ -63,10 +61,6 @@ indir(`datatype')_sve_intr_inc(counter) ...@@ -63,10 +61,6 @@ indir(`datatype')_sve_intr_inc(counter)
} }
while (indir(`datatype')_sve_intr_any(pg)); while (indir(`datatype')_sve_intr_any(pg));
/*#####################---sve-generated-code---###################################*/ /*#####################---sve-generated-code---###################################*/
')dnl ')dnl
dnl dnl
...@@ -97,6 +91,7 @@ dnl `define(`complex_equation', indir(input2`sve', `complex'))') ...@@ -97,6 +91,7 @@ dnl `define(`complex_equation', indir(input2`sve', `complex'))')
divert divert
/*#####################---sve-generated-code---###################################*/ /*#####################---sve-generated-code---###################################*/
ifdef(`disable_complex_instructions', ifdef(`disable_complex_instructions',
`ifdef(input1`sve',`', `ifdef(input1`sve',`',
`ifelse( `ifelse(
...@@ -106,8 +101,8 @@ double input1`imag' = cimag(input1); ...@@ -106,8 +101,8 @@ double input1`imag' = cimag(input1);
', datatype, `complex_float', ', datatype, `complex_float',
`float input1`real' = creal(input1); `float input1`real' = creal(input1);
float input1`imag' = cimag(input1); float input1`imag' = cimag(input1);
')')') ')')')dnl
dnl
ifdef(`disable_complex_instructions', ifdef(`disable_complex_instructions',
`ifdef(input2`sve',`', `ifdef(input2`sve',`',
`ifelse( `ifelse(
...@@ -117,8 +112,8 @@ double input2`imag' = cimag(input2); ...@@ -117,8 +112,8 @@ double input2`imag' = cimag(input2);
', datatype, `complex_float', ', datatype, `complex_float',
`float input2`real' = creal(input2); `float input2`real' = creal(input2);
float input2`imag' = cimag(input2); float input2`imag' = cimag(input2);
')')') ')')')dnl
dnl
ifdef(`input3', ifdef(`input3',
`ifdef(`disable_complex_instructions', `ifdef(`disable_complex_instructions',
`ifdef(input3`sve',`', `ifdef(input3`sve',`',
...@@ -129,8 +124,7 @@ double input3`imag' = cimag(input3); ...@@ -129,8 +124,7 @@ double input3`imag' = cimag(input3);
', datatype, `complex_float', ', datatype, `complex_float',
`float input3`real' = creal(input3); `float input3`real' = creal(input3);
float input3`imag' = cimag(input3); float input3`imag' = cimag(input3);
')')')') ')')')')dnl
__asm__ volatile __asm__ volatile
( (
"ldr x0, %[size] \n\t" "ldr x0, %[size] \n\t"
...@@ -200,11 +194,14 @@ ifdef(input2`sve', ...@@ -200,11 +194,14 @@ ifdef(input2`sve',
`[input2] "m" (input2),')') `[input2] "m" (input2),')')
ifdef(`input3', ifdef(`input3',
`ifdef(input3`sve', `ifdef(input3`sve',
`[input3] "r" (input3),', `[input3] "r" (input3),
',
`ifdef(`disable_complex_instructions', `ifdef(`disable_complex_instructions',
`[input3`real'] "m" (input3`real'), `[input3`real'] "m" (input3`real'),
[input3`imag'] "m" (input3`imag'),', [input3`imag'] "m" (input3`imag'),
`[input3] "m" (input3),')')') ',
`[input3] "m" (input3),
')')')dnl
[output] "r" (output), [output] "r" (output),
[size] "m" (size) [size] "m" (size)
: // register clobber list : // register clobber list
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment