Skip to content
Snippets Groups Projects
Commit 00f232f8 authored by Tyler Smith's avatar Tyler Smith
Browse files

Added single-precision micro-kernel for Knights Corner aka MIC aka Xeon Phi

parent 3fc60e49
No related branches found
No related tags found
No related merge requests found
......@@ -97,7 +97,7 @@
// It is sometimes useful to define the various memory alignments in terms
// of some other characteristics of the system, such as the cache line size
// and the page size.
#define BLIS_CACHE_LINE_SIZE 64
#define BLIS_CACHE_LINE_SIZE 256
#define BLIS_PAGE_SIZE 4096
// Alignment size needed by the instruction set for aligned SIMD/vector
......
......@@ -54,35 +54,42 @@
// (b) NR (for triangular operations such as trmm and trsm).
//
#define BLIS_DEFAULT_MC_S 256
#define BLIS_DEFAULT_KC_S 256
#define BLIS_DEFAULT_NC_S 8192
#define BLIS_DEFAULT_MC_S 240
#define BLIS_DEFAULT_KC_S 240
#define BLIS_DEFAULT_NC_S 9600
#define BLIS_DEFAULT_MC_D 120
#define BLIS_DEFAULT_KC_D 240
#define BLIS_DEFAULT_NC_D 14400
#define BLIS_DEFAULT_MC_C 128
#define BLIS_DEFAULT_KC_C 256
#define BLIS_DEFAULT_NC_C 4096
#define BLIS_DEFAULT_4M_MC_C BLIS_DEFAULT_MC_S
#define BLIS_DEFAULT_4M_KC_C BLIS_DEFAULT_KC_S
#define BLIS_DEFAULT_MC_Z 64
#define BLIS_DEFAULT_KC_Z 256
#define BLIS_DEFAULT_NC_Z 2048
#define BLIS_DEFAULT_3M_MC_C BLIS_DEFAULT_MC_S
#define BLIS_DEFAULT_3M_KC_C BLIS_DEFAULT_KC_S
/*
#define BLIS_DEFAULT_MC_C 120
#define BLIS_DEFAULT_KC_C 240
#define BLIS_DEFAULT_NC_C 9600
#define BLIS_DEFAULT_MC_Z 120
#define BLIS_DEFAULT_KC_Z 240
#define BLIS_DEFAULT_NC_Z 9600
*/
// -- Register blocksizes --
#define BLIS_DEFAULT_MR_S 8
#define BLIS_DEFAULT_NR_S 4
#define BLIS_DEFAULT_MR_S 30
#define BLIS_DEFAULT_NR_S 16
#define BLIS_DEFAULT_MR_D 30
#define BLIS_DEFAULT_NR_D 8
#define BLIS_DEFAULT_MR_C 8
#define BLIS_DEFAULT_NR_C 4
//#define BLIS_DEFAULT_MR_C 8
//#define BLIS_DEFAULT_NR_C 4
#define BLIS_DEFAULT_MR_Z 8
#define BLIS_DEFAULT_NR_Z 4
//#define BLIS_DEFAULT_MR_Z 8
//#define BLIS_DEFAULT_NR_Z 4
// NOTE: If the micro-kernel, which is typically unrolled to a factor
// of f, handles leftover edge cases (ie: when k % f > 0) then these
......@@ -123,8 +130,8 @@
// leading dimensions used within the packed micro-panels are equal to
// or greater than their corresponding register blocksizes above.
//#define BLIS_EXTEND_MR_S 0
//#define BLIS_EXTEND_NR_S 0
#define BLIS_EXTEND_MR_S 2
#define BLIS_EXTEND_NR_S 0
#define BLIS_EXTEND_MR_D 2
#define BLIS_EXTEND_NR_D 0
......@@ -153,12 +160,11 @@
// -- gemm --
#define BLIS_DGEMM_UKERNEL bli_dgemm_opt_30x8
#define BLIS_SGEMM_UKERNEL bli_sgemm_opt_30x16
// -- trsm-related --
// -- LEVEL-1M KERNEL DEFINITIONS ----------------------------------------------
// -- packm --
......
......@@ -40,25 +40,6 @@
#define B_L1_PREFETCH_DIST 2
#define L2_PREFETCH_DIST 16 // Must be greater than 10, because of the way the loop is constructed.
void bli_sgemm_opt_30x8(
dim_t k,
float* restrict alpha,
float* restrict a,
float* restrict b,
float* restrict beta,
float* restrict c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
)
{
BLIS_SGEMM_UKERNEL_REF( k,
alpha,
a,
b,
beta,
c, rs_c, cs_c,
data );
}
//Alternate code path uused if C is not row-major
#define UPDATE_C_ROW_SCATTERED(REG1, NUM, BASE_DEST) \
{ \
......@@ -268,7 +249,7 @@ void bli_sgemm_opt_30x8(
}
//This is an array used for the scattter/gather instructions.
int offsets[16] __attribute__((aligned(0x1000))) = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
extern int offsets[16];
//#define MONITORS
......
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment