divert(`-1') define(`floatsve', `defn(format(``floatsve.%s'', `$1'))') define(`floatsve_set', `define(format(``floatsve.%s'', `$1'), `$2')') dnl floatsve_set(`vec', `svfloat32_t')dnl floatsve_set(`intvec', `svuint32_t')dnl floatsve_set(`scalpointer', `float')dnl dnl floatsve_set(`load', `svld1_f32')dnl floatsve_set(`gatherload', `svld1_gather_index')dnl floatsve_set(`store', `svst1_f32')dnl floatsve_set(`count', `svcntw()')dnl floatsve_set(`index', `svindex_u32')dnl floatsve_set(`true', `svptrue_b32()')dnl floatsve_set(`while', `svwhilelt_b32')dnl floatsve_set(`neg', `svneg_f32_z')dnl floatsve_set(`abs', `svabs_f32_z')dnl dnl floatsve_set(`add', `svadd_f32_z')dnl floatsve_set(`sub', `svsub_f32_z')dnl floatsve_set(`mul', `svmul_f32_z')dnl floatsve_set(`div', `svdiv_f32_z')dnl dnl dnl define(`float_sve_intr_while_lower', `pg = svwhilelt_b32($1, $2); ') define(`float_sve_intr_load', ` svfloat32_t $3 = svld1_f32(pg, (const float *) &$1[$2]); ') define(`float_sve_intr_dup', ` svfloat32_t $2 = svdup_f32(pg, $1); ') define(`float_sve_intr_add', ` svfloat32_t res = svadd_f32_z(pg, $1, $2); svst1_f32(pg, (float *) &$3[$4], res); ') define(`float_sve_intr_sub', ` svfloat32_t res = svsub_f32_z(pg, $1, $2); svst1_f32(pg, (float *) &$3[$4], res); ') define(`float_sve_intr_mul', ` svfloat32_t res = svmul_f32_z(pg, $1, $2); svst1_f32(pg, (float *) &$3[$4], res); ') define(`float_sve_intr_div', ` svfloat32_t res = svdiv_f32_z(pg, $1, $2); svst1_f32(pg, (float *) &$3[$4], res); ') define(`float_sve_intr_muladd', ` svfloat32_t res = svmla_f32_z(pg, $3, $1, $2); svst1_f32(pg, (float *) &$4[$5], res); ') define(`float_sve_intr_addmul', ` svfloat32_t res = svmla_f32_z(pg, $1, $2, $3); svst1_f32(pg, (float *) &$4[$5], res); ') define(`float_sve_intr_submul', ` svfloat32_t res = svmls_f32_z(pg, $1, $2, $3); svst1_f32(pg, (float *) &$4[$5], res); ') define(`float_sve_intr_mulsub', ` svfloat32_t res = svnmls_f32_z(pg, $3, $1, $2); svst1_f32(pg, (float *) &$4[$5], res); ') define(`float_sve_intr_inc', ` $1 += svcntw();') define(`float_sve_intr_any', ` svptest_any(svptrue_b32(), $1)') define(`float_sve_asm_while_lower', ` "whilelo p0.s, $1, $2 \n\t"') define(`float_sve_asm_load', ` "ld1w z$3.s, p0/z, [%[$1], $2, lsl #2] \n\t"') define(`float_sve_asm_dup', ` "ld1rw z$2.s, p0/z, %[$1] \n\t"') define(`float_sve_asm_mul', ` "fmul z$1.s, p0/m, z$1.s, z$2.s \n\t" "st1w z$1.s, p0, [%[$5], $6, lsl #2] \n\t"') define(`float_sve_asm_div', ` "fdiv z$1.s, p0/m, z$1.s, z$2.s \n\t" "st1w z$1.s, p0, [%[$5], $6, lsl #2] \n\t"') define(`float_sve_asm_add', ` "fadd z$1.s, p0/m, z$1.s, z$2.s \n\t" "st1w z$1.s, p0, [%[$5], $6, lsl #2] \n\t"') define(`float_sve_asm_sub', ` "fsub z$1.s, p0/m, z$1.s, z$2.s \n\t" "st1w z$1.s, p0, [%[$5], $6, lsl #2] \n\t"') define(`float_sve_asm_muladd', ` "fmla z$3.s, p0/m, z$1.s, z$2.s \n\t" "st1w z$3.s, p0, [%[$7], $8, lsl #2] \n\t"') define(`float_sve_asm_addmul', ` "fmla z$1.s, p0/m, z$2.s, z$3.s \n\t" "st1w z$1.s, p0, [%[$7], $8, lsl #2] \n\t"') define(`float_sve_asm_submul', ` "fmls z$1.s, p0/m, z$2.s, z$3.s \n\t" "st1w z$1.s, p0, [%[$7], $8, lsl #2] \n\t"') define(`float_sve_asm_mulsub', ` "fnmls z$3.s, p0/m, z$1.s, z$2.s \n\t" "st1w z$3.s, p0, [%[$7], $8, lsl #2] \n\t"') define(`float_sve_asm_inc', ` "uqincw $1 \n\t"') divert`'dnl