Skip to content
Snippets Groups Projects
Select Git revision
  • b146b12b68f0a3a848ba7df5fb166a309f186f4f
  • master default protected
  • enxhi_issue460_remove_TOAR-I_access
  • michael_issue459_preprocess_german_stations
  • sh_pollutants
  • develop protected
  • release_v2.4.0
  • michael_issue450_feat_load-ifs-data
  • lukas_issue457_feat_set-config-paths-as-parameter
  • lukas_issue454_feat_use-toar-statistics-api-v2
  • lukas_issue453_refac_advanced-retry-strategy
  • lukas_issue452_bug_update-proj-version
  • lukas_issue449_refac_load-era5-data-from-toar-db
  • lukas_issue451_feat_robust-apriori-estimate-for-short-timeseries
  • lukas_issue448_feat_load-model-from-path
  • lukas_issue447_feat_store-and-load-local-clim-apriori-data
  • lukas_issue445_feat_data-insight-plot-monthly-distribution
  • lukas_issue442_feat_bias-free-evaluation
  • lukas_issue444_feat_choose-interp-method-cams
  • 414-include-crps-analysis-and-other-ens-verif-methods-or-plots
  • lukas_issue384_feat_aqw-data-handler
  • v2.4.0 protected
  • v2.3.0 protected
  • v2.2.0 protected
  • v2.1.0 protected
  • Kleinert_etal_2022_initial_submission
  • v2.0.0 protected
  • v1.5.0 protected
  • v1.4.0 protected
  • v1.3.0 protected
  • v1.2.1 protected
  • v1.2.0 protected
  • v1.1.0 protected
  • IntelliO3-ts-v1.0_R1-submit
  • v1.0.0 protected
  • v0.12.2 protected
  • v0.12.1 protected
  • v0.12.0 protected
  • v0.11.0 protected
  • v0.10.0 protected
  • IntelliO3-ts-v1.0_initial-submit
41 results

test_linear_model.py

Blame
  • complex_double.m4 11.40 KiB
    divert(`-1')
    
    define(`complex_doublesve', `defn(format(``complex_doublesve.%s'', `$1'))')
    define(`complex_doublesve_set', `define(format(``complex_doublesve.%s'', `$1'), `$2')')
    
    dnl
    complex_doublesve_set(`vec',  `svfloat64x2_t')dnl
    complex_doublesve_set(`intvec',  `svuint64_t')dnl
    complex_doublesve_set(`scalpointer',  `double')dnl
    dnl
    complex_doublesve_set(`load',  `svld2_f64')dnl
    complex_doublesve_set(`gatherload',  `svld1_gather_index')dnl
    complex_doublesve_set(`store', `svst2_f64')dnl
    complex_doublesve_set(`count', `svcntd() / 2')dnl
    complex_doublesve_set(`index', `svindex_u64')dnl
    complex_doublesve_set(`true',  `svptrue_b64()')dnl
    complex_doublesve_set(`while', `svwhilelt_b64')dnl
    complex_doublesve_set(`neg',   `svneg_f64_z')dnl
    complex_doublesve_set(`abs',   `svabs_f64_z')dnl
    dnl
    complex_doublesve_set(`add',   `svadd_f64_z')dnl
    complex_doublesve_set(`sub',   `svsub_f64_z')dnl
    complex_doublesve_set(`mul',   `svmul_f64_z')dnl
    complex_doublesve_set(`div',   `svdiv_f64_z')dnl
    dnl
    dnl
    
    define(`complex_double_sve_intr_while_lower',
    `pg = svwhilelt_b64($1, $2);    ')
    
    define(`complex_double_sve_intr_load',
    `ifdef(`disable_complex_instructions',
    `    svfloat64x2_t $3 = svld2_f64(pg, (const double *) &$1[$2 * 2]);
    ',
    `    svfloat64_t $3 = svld1_f64(pg, (const double *) &$1[$2 / 2]);  
    ')
    ')
    
    define(`complex_double_sve_intr_dup',
    `ifdef(`disable_complex_instructions',
    `    svfloat64x2_t $2 = {svdup_f64(creal($1)), svdup_f64(cimag($1))};
    ',
    `    svfloat64_t $2 = svdupq_f64( creal($1), cimag($1));  
    ')
    ')
    
    define(`complex_double_sve_intr_add', 
    `ifdef(`disable_complex_instructions',
    `	svfloat64_t re = svadd_f64_z(pg, $1.v0, $2.v0);
    	svfloat64_t im = svadd_f64_z(pg, $1.v1, $2.v1);
    	svfloat64x2_t res = {re, im};
    
    	svst2_f64(pg, (double *) &$3[$4 * 2], res);	
    ',
    `	svfloat64_t res = svadd_f64_z(pg, $1, $2);
    
    	svst1_f64(pg, (double *) &$3[$4 / 2], res);	
    ')
    ')
    
    define(`complex_double_sve_intr_sub', 
    `ifdef(`disable_complex_instructions',
    `	svfloat64_t re = svsub_f64_z(pg, $1.v0, $2.v0);
    	svfloat64_t im = svsub_f64_z(pg, $1.v1, $2.v1);
    	svfloat64x2_t res = {re, im};
    
    	svst2_f64(pg, (double *) &$3[$4 * 2], res);	
    ',
    `	svfloat64_t res = svsub_f64_z(pg, $1, $2);
    
    	svst1_f64(pg, (double *) &$3[$4 / 2], res);	
    ')
    ')
    
    define(`complex_double_sve_intr_mul', 
    `ifdef(`disable_complex_instructions',
    `	svfloat64_t retemp = svmul_f64_z(pg, $1.v0, $2.v0);
    	svfloat64_t imtemp = svmul_f64_z(pg, $1.v0, $2.v1);
    	svfloat64_t re = svmls_f64_z(pg, retemp, $1.v1, $2.v1); 
    	svfloat64_t im = svmla_f64_z(pg, imtemp, $1.v1, $2.v0); 
    	svfloat64x2_t res = {re, im};
    
    	svst2_f64(pg, (double *) &$3[$4 * 2], res);	
    ',
    `	svfloat64_t zero = svdup_f64(0.0);
    	svfloat64_t res_re = svcmla_f64_z(pg, zero, $1, $2, 0);
        svfloat64_t res_im = svcmla_f64_z(pg, zero, $1, $2, 90);
        svfloat64_t res = svadd_f64_z(pg, res_re, res_im);
    
    	svst1_f64(pg, (double *) &$3[$4 / 2], res);	
    ')
    ')
    
    define(`complex_double_sve_intr_div', 
    `TODO	svfloat64_t res = svdiv_f64_z(pg, $1, $2);
    
    	svst1_f64(pg, (double *) &$3[$4], res);	
    ')
    
    define(`complex_double_sve_intr_muladd',
    `ifdef(`disable_complex_instructions',
    `	svfloat64_t retemp = svmul_f64_z(pg, $1.v0, $2.v0);
    	svfloat64_t imtemp = svmul_f64_z(pg, $1.v0, $2.v1);
    	svfloat64_t re = svmls_f64_z(pg, retemp, $1.v1, $2.v1); 
    	svfloat64_t im = svmla_f64_z(pg, imtemp, $1.v1, $2.v0); 
    	re = svadd_f64_z(pg, re, $3.v0);
    	im = svadd_f64_z(pg, im, $3.v1);
    	svfloat64x2_t res = {re, im};
    
    	svst2_f64(pg, (double *) &$4[$5 * 2], res);	
    ',
    `	svfloat64_t zero = svdup_f64(0.0);
    	svfloat64_t res_re = svcmla_f64_z(pg, zero, $1, $2, 0);
        svfloat64_t res_im = svcmla_f64_z(pg, $3, $1, $2, 90);
        svfloat64_t res = svadd_f64_z(pg, res_re, res_im);
    
    	svst1_f64(pg, (double *) &$4[$5 / 2], res);	
    ')
    ')
    
    define(`complex_double_sve_intr_addmul',
    `ifdef(`disable_complex_instructions',
    `	svfloat64_t retemp = svmul_f64_z(pg, $2.v0, $3.v0);
    	svfloat64_t imtemp = svmul_f64_z(pg, $2.v0, $3.v1);
    	svfloat64_t re = svmls_f64_z(pg, retemp, $2.v1, $3.v1); 
    	svfloat64_t im = svmla_f64_z(pg, imtemp, $2.v1, $3.v0); 
    	re = svadd_f64_z(pg, re, $1.v0);
    	im = svadd_f64_z(pg, im, $1.v1);
    	svfloat64x2_t res = {re, im};
    
    	svst2_f64(pg, (double *) &$4[$5 * 2], res);	
    ',
    `	svfloat64_t zero = svdup_f64(0.0);
    	svfloat64_t res_re = svcmla_f64_z(pg, zero, $2, $3, 0);
        svfloat64_t res_im = svcmla_f64_z(pg, $1, $2, $3, 90);
        svfloat64_t res = svadd_f64_z(pg, res_re, res_im);
    
    	svst1_f64(pg, (double *) &$4[$5 / 2], res);	
    ')
    ')
    
    define(`complex_double_sve_intr_mulsub',
    `ifdef(`disable_complex_instructions',
    `	svfloat64_t retemp = svmul_f64_z(pg, $1.v0, $2.v0);
    	svfloat64_t imtemp = svmul_f64_z(pg, $1.v0, $2.v1);
    	svfloat64_t re = svmls_f64_z(pg, retemp, $1.v1, $2.v1); 
    	svfloat64_t im = svmla_f64_z(pg, imtemp, $1.v1, $2.v0); 
    	re = svsub_f64_z(pg, re, $3.v0);
    	im = svsub_f64_z(pg, im, $3.v1);
    	svfloat64x2_t res = {re, im};
    
    	svst2_f64(pg, (double *) &$4[$5 * 2], res);	
    ',
    `	svfloat64_t zero = svdup_f64(0.0);
    	$3 = svneg_f64_z(pg, $3);
        svfloat64_t res_re = svcmla_f64_z(pg, zero, $1, $2, 0);
        svfloat64_t res_im = svcmla_f64_z(pg, $3, $1, $2, 90);
        svfloat64_t res = svadd_f64_z(pg, res_re, res_im);
    
    	svst1_f64(pg, (double *) &$4[$5 / 2], res);	
    ')
    ')
    
    define(`complex_double_sve_intr_submul',
    `ifdef(`disable_complex_instructions',
    `	svfloat64_t retemp = svmul_f64_z(pg, $2.v0, $3.v0);
    	svfloat64_t imtemp = svmul_f64_z(pg, $2.v0, $3.v1);
    	svfloat64_t re = svmls_f64_z(pg, retemp, $2.v1, $3.v1); 
    	svfloat64_t im = svmla_f64_z(pg, imtemp, $2.v1, $3.v0); 
    	re = svsub_f64_z(pg, $1.v0, re);
    	im = svsub_f64_z(pg, $1.v1, im);
    	svfloat64x2_t res = {re, im};
    
    	svst2_f64(pg, (double *) &$4[$5 * 2], res);	
    ',
    `	svfloat64_t zero = svdup_f64(0.0);
    	$2 = svneg_f64_z(pg, $2);
        svfloat64_t res_re = svcmla_f64_z(pg, zero, $2, $3, 0);
        svfloat64_t res_im = svcmla_f64_z(pg, $1, $2, $3, 90);
        svfloat64_t res = svadd_f64_z(pg, res_re, res_im);
    
    	svst1_f64(pg, (double *) &$4[$5 / 2], res);	
    ')
    ')
    
    define(`complex_double_sve_intr_inc',
    `   $1 += svcntd() / 2;')
    
    define(`complex_double_sve_intr_any',
    `   svptest_any(svptrue_b64(), $1)')
    
    
    define(`complex_double_sve_asm_while_lower',
    `    "whilelo p0.d, $1, $2                   \n\t"')
    
    define(`complex_double_sve_asm_load',
    `ifdef(`disable_complex_instructions',
    `    "ld2d {z$3.d, z$4.d}, p0/z, [%[$1], $2, lsl #3]  \n\t"',
    `    "ld1d z$3.d, p0/z, [%[$1], $2, lsl #3]  \n\t"')')
    
    define(`complex_double_sve_asm_dup',
    `ifdef(`disable_complex_instructions',
    `     "ld1rd z$2.d, p0/z, %[$1real]                \n\t"
         "ld1rd z$3.d, p0/z, %[$1imag]                \n\t"',
    `    "ld1rqd z$2.d, p0/z, %[$1]              \n\t"')')
    
    define(`complex_double_sve_asm_mul',
    `ifdef(`disable_complex_instructions',
    `    "fmov z5.d, p0/m, #0.0                  \n\t"
        "fmov z6.d, p0/m, #0.0                  \n\t"
        "fmla z5.d, p0/m, z$1.d, z$3.d                  \n\t"
        "fmla z6.d, p0/m, z$1.d, z$4.d                  \n\t"
        "fmls z5.d, p0/m, z$2.d, z$4.d                  \n\t"
        "fmla z6.d, p0/m, z$2.d, z$3.d                  \n\t"
        "lsl x2, $6, #1                                 \n\t"
        "st2d {z5.d, z6.d}, p0, [%[$5], $6, lsl #3]    \n\t"',
    `    "fmov z4.d, p0/m, #0.0                  \n\t"
        "fcmla z4.d, p0/m, z$1.d, z$2.d, 0      \n\t"
        "fcmla z4.d, p0/m, z$1.d, z$2.d, 90     \n\t"
        "st1d z4.d, p0, [%[$5], $6, lsl #3]     \n\t"')')
    
    define(`complex_double_sve_asm_div',
    `    "         to do         \n\t"')
    
    define(`complex_double_sve_asm_add',
    `ifdef(`disable_complex_instructions',
    `    "fadd z$1.d, p0/m, z$1.d, z$3.d         \n\t"
        "fadd z$2.d, p0/m, z$2.d, z$4.d         \n\t"
        "lsl x2, $6, #1                                 \n\t"
        "st2d {z$1.d, z$2.d}, p0, [%[$5], $6, lsl #3]    \n\t"',
    `    "fadd z$1.d, p0/m, z$1.d, z$2.d         \n\t"
        "st1d z$1.d, p0, [%[$5], $6, lsl #3]    \n\t"')')
    
    define(`complex_double_sve_asm_sub',
    `ifdef(`disable_complex_instructions',
    `    "fsub z$1.d, p0/m, z$1.d, z$3.d         \n\t"
        "fsub z$2.d, p0/m, z$2.d, z$4.d         \n\t"
        "lsl x2, $6, #1                                 \n\t"
        "st2d {z$1.d, z$2.d}, p0, [%[$5]], $6, lsl #3]    \n\t"',
    `    "fsub z$1.d, p0/m, z$1.d, z$2.d         \n\t"
        "st1d z$1.d, p0, [%[$5], $6, lsl #3]    \n\t"')')
    
    define(`complex_double_sve_asm_muladd',
    `ifdef(`disable_complex_instructions',
    `    "fmov z7.d, p0/m, #0.0                  \n\t"
        "fmov z8.d, p0/m, #0.0                  \n\t"
        "fmla z7.d, p0/m, z$1.d, z$3.d          \n\t"
        "fmla z8.d, p0/m, z$1.d, z$4.d          \n\t"
        "fmls z7.d, p0/m, z$2.d, z$4.d          \n\t"
        "fmla z8.d, p0/m, z$2.d, z$3.d          \n\t"
        "fadd z7.d, p0/m, z7.d, z$5.d         \n\t"
        "fadd z8.d, p0/m, z8.d, z$6.d         \n\t"
        "lsl x2, $8, #1                                 \n\t"
        "st2d {z7.d, z8.d}, p0, [%[$7], $8, lsl #3]    \n\t"',
    `    "fcmla z$3.d, p0/m, z$1.d, z$2.d, 0     \n\t"
        "fcmla z$3.d, p0/m, z$1.d, z$2.d, 90    \n\t"
        "st1d z$3.d, p0, [%[$7], $8, lsl #3]    \n\t"')')
    
    define(`complex_double_sve_asm_addmul',
    `ifdef(`disable_complex_instructions',
    `    "fmov z7.d, p0/m, #0.0                  \n\t"
        "fmov z8.d, p0/m, #0.0                  \n\t"
        "fmla z7.d, p0/m, z$3.d, z$5.d          \n\t"
        "fmla z8.d, p0/m, z$3.d, z$6.d          \n\t"
        "fmls z7.d, p0/m, z$4.d, z$6.d          \n\t"
        "fmla z8.d, p0/m, z$4.d, z$5.d          \n\t"
        "fadd z7.d, p0/m, z7.d, z$1.d         \n\t"
        "fadd z8.d, p0/m, z8.d, z$2.d         \n\t"
        "lsl x2, $8, #1                                 \n\t"
        "st2d {z7.d, z8.d}, p0, [%[$7], $8, lsl #3]    \n\t"',
    `    "fcmla z$1.d, p0/m, z$2.d, z$3.d, 0     \n\t"
        "fcmla z$1.d, p0/m, z$2.d, z$3.d, 90    \n\t"
        "st1d z$1.d, p0, [%[$7], $8, lsl #3]    \n\t"')')
    
    define(`complex_double_sve_asm_mulsub',
    `ifdef(`disable_complex_instructions',
    `    "fmov z7.d, p0/m, #0.0                  \n\t"
        "fmov z8.d, p0/m, #0.0                  \n\t"
        "fmla z7.d, p0/m, z$1.d, z$3.d          \n\t"
        "fmla z8.d, p0/m, z$1.d, z$4.d          \n\t"
        "fmls z7.d, p0/m, z$2.d, z$4.d          \n\t"
        "fmla z8.d, p0/m, z$2.d, z$3.d          \n\t"
        "fsub z7.d, p0/m, z7.d, z$5.d         \n\t"
        "fsub z8.d, p0/m, z8.d, z$6.d         \n\t"
        "lsl x2, $8, #1                                 \n\t"
        "st2d {z7.d, z8.d}, p0, [%[$7], $8, lsl #3]    \n\t"',
    `    "fneg z4.d, p0/m, z$3.d                \n\t"
        "fcmla z4.d, p0/m, z$1.d, z$2.d, 0     \n\t"
        "fcmla z4.d, p0/m, z$1.d, z$2.d, 90    \n\t"
        "st1d z4.d, p0, [%[$7], $8, lsl #3]    \n\t"')')
    
    define(`complex_double_sve_asm_submul',
    `ifdef(`disable_complex_instructions',
    `    "fmov z7.d, p0/m, #0.0                  \n\t"
        "fmov z8.d, p0/m, #0.0                  \n\t"
        "fneg z$2.d, p0/m, z$2.d                 \n\t"
        "fneg z$3.d, p0/m, z$3.d                 \n\t"
        "fmla z7.d, p0/m, z$3.d, z$5.d          \n\t"
        "fmla z8.d, p0/m, z$3.d, z$6.d          \n\t"
        "fmls z7.d, p0/m, z$4.d, z$6.d          \n\t"
        "fmla z8.d, p0/m, z$4.d, z$5.d          \n\t"
        "fadd z7.d, p0/m, z7.d, z$1.d         \n\t"
        "fadd z8.d, p0/m, z8.d, z$2.d         \n\t"
        "lsl x2, $8, #1                                 \n\t"
        "st2d {z7.d, z8.d}, p0, [%[$7], $8, lsl #3]    \n\t"',
    `    "fneg z4.d, p0/m, z$2.d                 \n\t"
        "fcmla z$1.d, p0/m, z4.d, z$3.d, 0      \n\t"
        "fcmla z$1.d, p0/m, z4.d, z$3.d, 90     \n\t"
        "st1d z$1.d, p0, [%[$7], $8, lsl #3]    \n\t"')')
    
    define(`complex_double_sve_asm_inc',
    `ifdef(`disable_complex_instructions',
    `    "uqincd $1                              \n\t"
        "uqincd $1                              \n\t"',
    `    "uqincd $1                              \n\t"')')
    
    
    
    divert`'dnl