Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
miniFE-delme
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
EPI-WP1-Public
miniFE-delme
Commits
600625d2
Commit
600625d2
authored
5 years ago
by
Romain Dolbeau
Browse files
Options
Downloads
Patches
Plain Diff
Implements LMUL=2 using wide-add to expand the indices; not yet supported by vehave so untested.
parent
3ced7971
Branches
sliced_ellpack_rvv
No related tags found
No related merge requests found
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
openmp-rvv/src/Makefile.clang.openmp
+1
-1
1 addition, 1 deletion
openmp-rvv/src/Makefile.clang.openmp
openmp-rvv/src/SELLMatrix.hpp
+4
-0
4 additions, 0 deletions
openmp-rvv/src/SELLMatrix.hpp
openmp-rvv/src/SparseMatrix_functions.hpp
+23
-0
23 additions, 0 deletions
openmp-rvv/src/SparseMatrix_functions.hpp
with
28 additions
and
1 deletion
openmp-rvv/src/Makefile.clang.openmp
+
1
−
1
View file @
600625d2
...
...
@@ -18,7 +18,7 @@ MINIFE_MATRIX_TYPE = -DMINIFE_SELL_MATRIX
#-----------------------------------------------------------------------
CFLAGS
=
-O2
-mepi
-fno-vectorize
-fopenmp
#-static
CFLAGS
=
-O2
-mepi
-fno-vectorize
-fopenmp
#-DWIDE_LMUL
#-static
CXXFLAGS
=
$(
CFLAGS
)
LLVMDIR
=
/opt/llvm-EPI-development-toolchain-native
...
...
This diff is collapsed.
Click to expand it.
openmp-rvv/src/SELLMatrix.hpp
+
4
−
0
View file @
600625d2
...
...
@@ -96,7 +96,11 @@ SELLMatrix {
num_cols_per_row
=
ncols_per_row
;
// make size of the block equal to a reasonable RVV size
#ifndef WIDE_LMUL
num_rows_per_block
=
__builtin_epi_vsetvl
(
8
,
__epi_e64
,
__epi_m1
);
// FIXME: heuristic instead of 8?
#else
num_rows_per_block
=
__builtin_epi_vsetvl
(
8
,
__epi_e64
,
__epi_m2
);
// FIXME: heuristic instead of 8?
#endif
num_blocks
=
(
nrows
+
num_rows_per_block
-
1
)
/
num_rows_per_block
;
}
...
...
This diff is collapsed.
Click to expand it.
openmp-rvv/src/SparseMatrix_functions.hpp
+
23
−
0
View file @
600625d2
...
...
@@ -675,7 +675,9 @@ void operator()(MatrixType& A,
int
num_blocks
=
A
.
num_blocks
;
int
num_rows_per_block
=
A
.
num_rows_per_block
;
#pragma omp parallel for
for
(
int
block_id
=
0
;
block_id
<
num_blocks
;
block_id
++
)
{
#ifndef WIDE_LMUL
__epi_1xf64
sum
=
__builtin_epi_vfmv_v_f_1xf64
(
0.0
,
num_rows_per_block
);
int
block_offset
=
block_id
*
num_rows_per_block
*
row_len
;
int
stride
=
num_rows_per_block
;
...
...
@@ -696,6 +698,27 @@ void operator()(MatrixType& A,
block_offset
+=
stride
;
}
__builtin_epi_vstore_1xf64
(
&
ycoefs
[
block_id
*
num_rows_per_block
],
sum
,
stride
);
#else
__epi_2xf64
sum
=
__builtin_epi_vfmv_v_f_2xf64
(
0.0
,
num_rows_per_block
);
int
block_offset
=
block_id
*
num_rows_per_block
*
row_len
;
int
stride
=
num_rows_per_block
;
if
(
block_id
==
num_blocks
-
1
&&
n
%
num_rows_per_block
!=
0
){
stride
=
n
%
num_rows_per_block
;
}
/* SVE uses a mask here, we can just alter VL, any VL <= num_rows_per_block (from vsetvli) should be legit */
for
(
int
i
=
0
;
i
<
row_len
;
i
++
){
__epi_2xf64
acofs
=
__builtin_epi_vload_2xf64
(
&
Acoefs
[
block_offset
],
stride
);
__epi_2xi32
indices32
=
__builtin_epi_vload_2xi32
(
&
Acols
[
block_offset
],
stride
);
// in element
__epi_2xi64
indices
=
__builtin_epi_vwadd_2xi64
(
indices32
,
indices32
,
stride
);
// in 2*element, but we need sizeof(double)*element
indices
=
__builtin_epi_vsll_2xi64
(
indices
,
__builtin_epi_vmv_v_x_2xi64
(
2
,
stride
),
stride
);
// time 4
__epi_2xf64
xcofs
=
__builtin_epi_vload_indexed_2xf64
(
&
xcoefs
[
0
],
indices
,
stride
);
// ... indices in bytes, no element in EPI/V
sum
=
__builtin_epi_vfmacc_2xf64
(
sum
,
acofs
,
xcofs
,
stride
);
block_offset
+=
stride
;
}
__builtin_epi_vstore_2xf64
(
&
ycoefs
[
block_id
*
num_rows_per_block
],
sum
,
stride
);
#endif
}
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment