Skip to content
Snippets Groups Projects
Select Git revision
  • bd9ba495e22b1fb7a07eb71fa5427d0cfe5acd6b
  • bing_issues#190_tf2
  • bing_tf2_convert
  • bing_issue#189_train_modular
  • simon_#172_integrate_weatherbench
  • develop
  • bing_issue#188_restructure_ambs
  • yan_issue#100_extract_prcp_data
  • bing_issue#170_data_preprocess_training_tf1
  • Gong2022_temperature_forecasts
  • bing_issue#186_clean_GMD1_tag
  • yan_issue#179_integrate_GZAWS_data_onfly
  • bing_issue#178_runscript_bug_postprocess
  • michael_issue#187_bugfix_setup_runscript_template
  • bing_issue#180_bugs_postprpocess_meta_postprocess
  • yan_issue#177_repo_for_CLGAN_gmd
  • bing_issue#176_integrate_weather_bench
  • michael_issue#181_eval_era5_forecasts
  • michael_issue#182_eval_subdomain
  • michael_issue#119_warmup_Horovod
  • bing_issue#160_test_zam347
  • ambs_v1
  • ambs_gmd_nowcasting_v1.0
  • GMD1
  • modular_booster_20210203
  • new_structure_20201004_v1.0
  • old_structure_20200930
27 results

config_utils.py

Blame
  • complex_double.m4 11.40 KiB
    divert(`-1')
    
    define(`complex_doublesve', `defn(format(``complex_doublesve.%s'', `$1'))')
    define(`complex_doublesve_set', `define(format(``complex_doublesve.%s'', `$1'), `$2')')
    
    dnl
    complex_doublesve_set(`vec',  `svfloat64x2_t')dnl
    complex_doublesve_set(`intvec',  `svuint64_t')dnl
    complex_doublesve_set(`scalpointer',  `double')dnl
    dnl
    complex_doublesve_set(`load',  `svld2_f64')dnl
    complex_doublesve_set(`gatherload',  `svld1_gather_index')dnl
    complex_doublesve_set(`store', `svst2_f64')dnl
    complex_doublesve_set(`count', `svcntd() / 2')dnl
    complex_doublesve_set(`index', `svindex_u64')dnl
    complex_doublesve_set(`true',  `svptrue_b64()')dnl
    complex_doublesve_set(`while', `svwhilelt_b64')dnl
    complex_doublesve_set(`neg',   `svneg_f64_z')dnl
    complex_doublesve_set(`abs',   `svabs_f64_z')dnl
    dnl
    complex_doublesve_set(`add',   `svadd_f64_z')dnl
    complex_doublesve_set(`sub',   `svsub_f64_z')dnl
    complex_doublesve_set(`mul',   `svmul_f64_z')dnl
    complex_doublesve_set(`div',   `svdiv_f64_z')dnl
    dnl
    dnl
    
    define(`complex_double_sve_intr_while_lower',
    `pg = svwhilelt_b64($1, $2);    ')
    
    define(`complex_double_sve_intr_load',
    `ifdef(`disable_complex_instructions',
    `    svfloat64x2_t $3 = svld2_f64(pg, (const double *) &$1[$2 * 2]);
    ',
    `    svfloat64_t $3 = svld1_f64(pg, (const double *) &$1[$2 / 2]);  
    ')
    ')
    
    define(`complex_double_sve_intr_dup',
    `ifdef(`disable_complex_instructions',
    `    svfloat64x2_t $2 = {svdup_f64(creal($1)), svdup_f64(cimag($1))};
    ',
    `    svfloat64_t $2 = svdupq_f64( creal($1), cimag($1));  
    ')
    ')
    
    define(`complex_double_sve_intr_add', 
    `ifdef(`disable_complex_instructions',
    `	svfloat64_t re = svadd_f64_z(pg, $1.v0, $2.v0);
    	svfloat64_t im = svadd_f64_z(pg, $1.v1, $2.v1);
    	svfloat64x2_t res = {re, im};
    
    	svst2_f64(pg, (double *) &$3[$4 * 2], res);	
    ',
    `	svfloat64_t res = svadd_f64_z(pg, $1, $2);
    
    	svst1_f64(pg, (double *) &$3[$4 / 2], res);	
    ')
    ')
    
    define(`complex_double_sve_intr_sub', 
    `ifdef(`disable_complex_instructions',
    `	svfloat64_t re = svsub_f64_z(pg, $1.v0, $2.v0);
    	svfloat64_t im = svsub_f64_z(pg, $1.v1, $2.v1);
    	svfloat64x2_t res = {re, im};
    
    	svst2_f64(pg, (double *) &$3[$4 * 2], res);	
    ',
    `	svfloat64_t res = svsub_f64_z(pg, $1, $2);
    
    	svst1_f64(pg, (double *) &$3[$4 / 2], res);	
    ')
    ')
    
    define(`complex_double_sve_intr_mul', 
    `ifdef(`disable_complex_instructions',
    `	svfloat64_t retemp = svmul_f64_z(pg, $1.v0, $2.v0);
    	svfloat64_t imtemp = svmul_f64_z(pg, $1.v0, $2.v1);
    	svfloat64_t re = svmls_f64_z(pg, retemp, $1.v1, $2.v1); 
    	svfloat64_t im = svmla_f64_z(pg, imtemp, $1.v1, $2.v0); 
    	svfloat64x2_t res = {re, im};
    
    	svst2_f64(pg, (double *) &$3[$4 * 2], res);	
    ',
    `	svfloat64_t zero = svdup_f64(0.0);
    	svfloat64_t res_re = svcmla_f64_z(pg, zero, $1, $2, 0);
        svfloat64_t res_im = svcmla_f64_z(pg, zero, $1, $2, 90);
        svfloat64_t res = svadd_f64_z(pg, res_re, res_im);
    
    	svst1_f64(pg, (double *) &$3[$4 / 2], res);	
    ')
    ')
    
    define(`complex_double_sve_intr_div', 
    `TODO	svfloat64_t res = svdiv_f64_z(pg, $1, $2);
    
    	svst1_f64(pg, (double *) &$3[$4], res);	
    ')
    
    define(`complex_double_sve_intr_muladd',
    `ifdef(`disable_complex_instructions',
    `	svfloat64_t retemp = svmul_f64_z(pg, $1.v0, $2.v0);
    	svfloat64_t imtemp = svmul_f64_z(pg, $1.v0, $2.v1);
    	svfloat64_t re = svmls_f64_z(pg, retemp, $1.v1, $2.v1); 
    	svfloat64_t im = svmla_f64_z(pg, imtemp, $1.v1, $2.v0); 
    	re = svadd_f64_z(pg, re, $3.v0);
    	im = svadd_f64_z(pg, im, $3.v1);
    	svfloat64x2_t res = {re, im};
    
    	svst2_f64(pg, (double *) &$4[$5 * 2], res);	
    ',
    `	svfloat64_t zero = svdup_f64(0.0);
    	svfloat64_t res_re = svcmla_f64_z(pg, zero, $1, $2, 0);
        svfloat64_t res_im = svcmla_f64_z(pg, $3, $1, $2, 90);
        svfloat64_t res = svadd_f64_z(pg, res_re, res_im);
    
    	svst1_f64(pg, (double *) &$4[$5 / 2], res);	
    ')
    ')
    
    define(`complex_double_sve_intr_addmul',
    `ifdef(`disable_complex_instructions',
    `	svfloat64_t retemp = svmul_f64_z(pg, $2.v0, $3.v0);
    	svfloat64_t imtemp = svmul_f64_z(pg, $2.v0, $3.v1);
    	svfloat64_t re = svmls_f64_z(pg, retemp, $2.v1, $3.v1); 
    	svfloat64_t im = svmla_f64_z(pg, imtemp, $2.v1, $3.v0); 
    	re = svadd_f64_z(pg, re, $1.v0);
    	im = svadd_f64_z(pg, im, $1.v1);
    	svfloat64x2_t res = {re, im};
    
    	svst2_f64(pg, (double *) &$4[$5 * 2], res);	
    ',
    `	svfloat64_t zero = svdup_f64(0.0);
    	svfloat64_t res_re = svcmla_f64_z(pg, zero, $2, $3, 0);
        svfloat64_t res_im = svcmla_f64_z(pg, $1, $2, $3, 90);
        svfloat64_t res = svadd_f64_z(pg, res_re, res_im);
    
    	svst1_f64(pg, (double *) &$4[$5 / 2], res);	
    ')
    ')
    
    define(`complex_double_sve_intr_mulsub',
    `ifdef(`disable_complex_instructions',
    `	svfloat64_t retemp = svmul_f64_z(pg, $1.v0, $2.v0);
    	svfloat64_t imtemp = svmul_f64_z(pg, $1.v0, $2.v1);
    	svfloat64_t re = svmls_f64_z(pg, retemp, $1.v1, $2.v1); 
    	svfloat64_t im = svmla_f64_z(pg, imtemp, $1.v1, $2.v0); 
    	re = svsub_f64_z(pg, re, $3.v0);
    	im = svsub_f64_z(pg, im, $3.v1);
    	svfloat64x2_t res = {re, im};
    
    	svst2_f64(pg, (double *) &$4[$5 * 2], res);	
    ',
    `	svfloat64_t zero = svdup_f64(0.0);
    	$3 = svneg_f64_z(pg, $3);
        svfloat64_t res_re = svcmla_f64_z(pg, zero, $1, $2, 0);
        svfloat64_t res_im = svcmla_f64_z(pg, $3, $1, $2, 90);
        svfloat64_t res = svadd_f64_z(pg, res_re, res_im);
    
    	svst1_f64(pg, (double *) &$4[$5 / 2], res);	
    ')
    ')
    
    define(`complex_double_sve_intr_submul',
    `ifdef(`disable_complex_instructions',
    `	svfloat64_t retemp = svmul_f64_z(pg, $2.v0, $3.v0);
    	svfloat64_t imtemp = svmul_f64_z(pg, $2.v0, $3.v1);
    	svfloat64_t re = svmls_f64_z(pg, retemp, $2.v1, $3.v1); 
    	svfloat64_t im = svmla_f64_z(pg, imtemp, $2.v1, $3.v0); 
    	re = svsub_f64_z(pg, $1.v0, re);
    	im = svsub_f64_z(pg, $1.v1, im);
    	svfloat64x2_t res = {re, im};
    
    	svst2_f64(pg, (double *) &$4[$5 * 2], res);	
    ',
    `	svfloat64_t zero = svdup_f64(0.0);
    	$2 = svneg_f64_z(pg, $2);
        svfloat64_t res_re = svcmla_f64_z(pg, zero, $2, $3, 0);
        svfloat64_t res_im = svcmla_f64_z(pg, $1, $2, $3, 90);
        svfloat64_t res = svadd_f64_z(pg, res_re, res_im);
    
    	svst1_f64(pg, (double *) &$4[$5 / 2], res);	
    ')
    ')
    
    define(`complex_double_sve_intr_inc',
    `   $1 += svcntd() / 2;')
    
    define(`complex_double_sve_intr_any',
    `   svptest_any(svptrue_b64(), $1)')
    
    
    define(`complex_double_sve_asm_while_lower',
    `    "whilelo p0.d, $1, $2                   \n\t"')
    
    define(`complex_double_sve_asm_load',
    `ifdef(`disable_complex_instructions',
    `    "ld2d {z$3.d, z$4.d}, p0/z, [%[$1], $2, lsl #3]  \n\t"',
    `    "ld1d z$3.d, p0/z, [%[$1], $2, lsl #3]  \n\t"')')
    
    define(`complex_double_sve_asm_dup',
    `ifdef(`disable_complex_instructions',
    `     "ld1rd z$2.d, p0/z, %[$1real]                \n\t"
         "ld1rd z$3.d, p0/z, %[$1imag]                \n\t"',
    `    "ld1rqd z$2.d, p0/z, %[$1]              \n\t"')')
    
    define(`complex_double_sve_asm_mul',
    `ifdef(`disable_complex_instructions',
    `    "fmov z5.d, p0/m, #0.0                  \n\t"
        "fmov z6.d, p0/m, #0.0                  \n\t"
        "fmla z5.d, p0/m, z$1.d, z$3.d                  \n\t"
        "fmla z6.d, p0/m, z$1.d, z$4.d                  \n\t"
        "fmls z5.d, p0/m, z$2.d, z$4.d                  \n\t"
        "fmla z6.d, p0/m, z$2.d, z$3.d                  \n\t"
        "lsl x2, $6, #1                                 \n\t"
        "st2d {z5.d, z6.d}, p0, [%[$5], $6, lsl #3]    \n\t"',
    `    "fmov z4.d, p0/m, #0.0                  \n\t"
        "fcmla z4.d, p0/m, z$1.d, z$2.d, 0      \n\t"
        "fcmla z4.d, p0/m, z$1.d, z$2.d, 90     \n\t"
        "st1d z4.d, p0, [%[$5], $6, lsl #3]     \n\t"')')
    
    define(`complex_double_sve_asm_div',
    `    "         to do         \n\t"')
    
    define(`complex_double_sve_asm_add',
    `ifdef(`disable_complex_instructions',
    `    "fadd z$1.d, p0/m, z$1.d, z$3.d         \n\t"
        "fadd z$2.d, p0/m, z$2.d, z$4.d         \n\t"
        "lsl x2, $6, #1                                 \n\t"
        "st2d {z$1.d, z$2.d}, p0, [%[$5], $6, lsl #3]    \n\t"',
    `    "fadd z$1.d, p0/m, z$1.d, z$2.d         \n\t"
        "st1d z$1.d, p0, [%[$5], $6, lsl #3]    \n\t"')')
    
    define(`complex_double_sve_asm_sub',
    `ifdef(`disable_complex_instructions',
    `    "fsub z$1.d, p0/m, z$1.d, z$3.d         \n\t"
        "fsub z$2.d, p0/m, z$2.d, z$4.d         \n\t"
        "lsl x2, $6, #1                                 \n\t"
        "st2d {z$1.d, z$2.d}, p0, [%[$5]], $6, lsl #3]    \n\t"',
    `    "fsub z$1.d, p0/m, z$1.d, z$2.d         \n\t"
        "st1d z$1.d, p0, [%[$5], $6, lsl #3]    \n\t"')')
    
    define(`complex_double_sve_asm_muladd',
    `ifdef(`disable_complex_instructions',
    `    "fmov z7.d, p0/m, #0.0                  \n\t"
        "fmov z8.d, p0/m, #0.0                  \n\t"
        "fmla z7.d, p0/m, z$1.d, z$3.d          \n\t"
        "fmla z8.d, p0/m, z$1.d, z$4.d          \n\t"
        "fmls z7.d, p0/m, z$2.d, z$4.d          \n\t"
        "fmla z8.d, p0/m, z$2.d, z$3.d          \n\t"
        "fadd z7.d, p0/m, z7.d, z$5.d         \n\t"
        "fadd z8.d, p0/m, z8.d, z$6.d         \n\t"
        "lsl x2, $8, #1                                 \n\t"
        "st2d {z7.d, z8.d}, p0, [%[$7], $8, lsl #3]    \n\t"',
    `    "fcmla z$3.d, p0/m, z$1.d, z$2.d, 0     \n\t"
        "fcmla z$3.d, p0/m, z$1.d, z$2.d, 90    \n\t"
        "st1d z$3.d, p0, [%[$7], $8, lsl #3]    \n\t"')')
    
    define(`complex_double_sve_asm_addmul',
    `ifdef(`disable_complex_instructions',
    `    "fmov z7.d, p0/m, #0.0                  \n\t"
        "fmov z8.d, p0/m, #0.0                  \n\t"
        "fmla z7.d, p0/m, z$3.d, z$5.d          \n\t"
        "fmla z8.d, p0/m, z$3.d, z$6.d          \n\t"
        "fmls z7.d, p0/m, z$4.d, z$6.d          \n\t"
        "fmla z8.d, p0/m, z$4.d, z$5.d          \n\t"
        "fadd z7.d, p0/m, z7.d, z$1.d         \n\t"
        "fadd z8.d, p0/m, z8.d, z$2.d         \n\t"
        "lsl x2, $8, #1                                 \n\t"
        "st2d {z7.d, z8.d}, p0, [%[$7], $8, lsl #3]    \n\t"',
    `    "fcmla z$1.d, p0/m, z$2.d, z$3.d, 0     \n\t"
        "fcmla z$1.d, p0/m, z$2.d, z$3.d, 90    \n\t"
        "st1d z$1.d, p0, [%[$7], $8, lsl #3]    \n\t"')')
    
    define(`complex_double_sve_asm_mulsub',
    `ifdef(`disable_complex_instructions',
    `    "fmov z7.d, p0/m, #0.0                  \n\t"
        "fmov z8.d, p0/m, #0.0                  \n\t"
        "fmla z7.d, p0/m, z$1.d, z$3.d          \n\t"
        "fmla z8.d, p0/m, z$1.d, z$4.d          \n\t"
        "fmls z7.d, p0/m, z$2.d, z$4.d          \n\t"
        "fmla z8.d, p0/m, z$2.d, z$3.d          \n\t"
        "fsub z7.d, p0/m, z7.d, z$5.d         \n\t"
        "fsub z8.d, p0/m, z8.d, z$6.d         \n\t"
        "lsl x2, $8, #1                                 \n\t"
        "st2d {z7.d, z8.d}, p0, [%[$7], $8, lsl #3]    \n\t"',
    `    "fneg z4.d, p0/m, z$3.d                \n\t"
        "fcmla z4.d, p0/m, z$1.d, z$2.d, 0     \n\t"
        "fcmla z4.d, p0/m, z$1.d, z$2.d, 90    \n\t"
        "st1d z4.d, p0, [%[$7], $8, lsl #3]    \n\t"')')
    
    define(`complex_double_sve_asm_submul',
    `ifdef(`disable_complex_instructions',
    `    "fmov z7.d, p0/m, #0.0                  \n\t"
        "fmov z8.d, p0/m, #0.0                  \n\t"
        "fneg z$2.d, p0/m, z$2.d                 \n\t"
        "fneg z$3.d, p0/m, z$3.d                 \n\t"
        "fmla z7.d, p0/m, z$3.d, z$5.d          \n\t"
        "fmla z8.d, p0/m, z$3.d, z$6.d          \n\t"
        "fmls z7.d, p0/m, z$4.d, z$6.d          \n\t"
        "fmla z8.d, p0/m, z$4.d, z$5.d          \n\t"
        "fadd z7.d, p0/m, z7.d, z$1.d         \n\t"
        "fadd z8.d, p0/m, z8.d, z$2.d         \n\t"
        "lsl x2, $8, #1                                 \n\t"
        "st2d {z7.d, z8.d}, p0, [%[$7], $8, lsl #3]    \n\t"',
    `    "fneg z4.d, p0/m, z$2.d                 \n\t"
        "fcmla z$1.d, p0/m, z4.d, z$3.d, 0      \n\t"
        "fcmla z$1.d, p0/m, z4.d, z$3.d, 90     \n\t"
        "st1d z$1.d, p0, [%[$7], $8, lsl #3]    \n\t"')')
    
    define(`complex_double_sve_asm_inc',
    `ifdef(`disable_complex_instructions',
    `    "uqincd $1                              \n\t"
        "uqincd $1                              \n\t"',
    `    "uqincd $1                              \n\t"')')
    
    
    
    divert`'dnl