Skip to content
Snippets Groups Projects
Commit cf2768ff authored by Dirk Pleiter's avatar Dirk Pleiter
Browse files

Update

parent 0cf837d5
Branches
Tags
No related merge requests found
Showing
with 0 additions and 823 deletions
10
35
-19
-46
-2
-50
8
42
-47
23
11
40
45
50
8
-49
-6
0
-32
48
39
26
14
-25
-47
-22
48
-19
5
-40
22
-38
-9
-11
7
16
-10
-26
-29
-17
-23
15
25
-42
-23
-30
44
-28
-46
19
-29
-45
-24
29
36
-18
-25
26
-12
42
-35
-11
10
-40
-43
5
19
-35
6
-31
-3
2
-15
-9
-17
13
0
45
35
35
29
28
-15
-27
24
-39
-27
-37
-26
-23
19
-32
27
-27
49
2
20
-15
-9
-35
-34
23
-20
26
-10
26
-6
31
-32
33
-34
11
7
-29
-20
-43
-15
44
2
-19
28
-17
0
13
-14
9
-28
43
26
-10
4
41
-21
-44
-11
29
-33
2
-3
0
-1
-14
-31
38
-14
-6
43
-3
-33
46
9
14
26
35
41
13
8
-7
-46
-44
40
17
26
19
48
-24
12
-36
-25
-46
-2
-15
-18
-17
-2
-46
31
-39
-39
5
-40
27
-25
10
17
-6
-44
-47
26
-40
-19
41
-15
22
14
15
-19
24
-14
2
45
-34
37
-26
49
-41
-31
1
-28
3
14
-44
18
-7
-45
-38
28
1
49
-40
31
-24
-42
15
-6
-43
-33
-38
-49
-2
11
7
49
28
39
-5
25
-3
-1
37
-34
-20
-1
27
31
-19
-33
41
8
39
-42
29
-21
16
19
-25
16
19
-3
-11
15
-1
6
36
5
11
-12
-41
-41
2
12
10
0
14
21
39
19
48
20
38
-28
-4
30
-12
36
33
-2
16
9
-4
-48
10
-38
-7
28
14
10
-24
-39
-42
-19
File moved
/***********************
Conway Game of Life
serial version
************************/
#include <stdio.h>
#include <stdlib.h>
#define NI 200 /* array sizes */
#define NJ 200
#define NSTEPS 500 /* number of time steps */
int main(int argc, char *argv[])
{
int i, j, n, im, ip, jm, jp, ni, nj, nsum, isum;
int **old, **new;
float x;
/* allocate arrays */
ni = NI + 2; /* add 2 for left and right ghost cells */
nj = NJ + 2;
old = malloc(ni*sizeof(int*));
new = malloc(ni*sizeof(int*));
for(i=0; i<ni; i++){
old[i] = malloc(nj*sizeof(int));
new[i] = malloc(nj*sizeof(int));
}
/* initialize elements of old to 0 or 1 */
for(i=1; i<=NI; i++){
for(j=1; j<=NJ; j++){
x = rand()/((float)RAND_MAX + 1);
if(x<0.5){
old[i][j] = 0;
} else {
old[i][j] = 1;
}
}
}
/* time steps */
for(n=0; n<NSTEPS; n++){
/* corner boundary conditions */
old[0][0] = old[NI][NJ];
old[0][NJ+1] = old[NI][1];
old[NI+1][NJ+1] = old[1][1];
old[NI+1][0] = old[1][NJ];
/* left-right boundary conditions */
for(i=1; i<=NI; i++){
old[i][0] = old[i][NJ];
old[i][NJ+1] = old[i][1];
}
/* top-bottom boundary conditions */
for(j=1; j<=NJ; j++){
old[0][j] = old[NI][j];
old[NI+1][j] = old[1][j];
}
for(i=1; i<=NI; i++){
for(j=1; j<=NJ; j++){
im = i-1;
ip = i+1;
jm = j-1;
jp = j+1;
nsum = old[im][jp] + old[i][jp] + old[ip][jp]
+ old[im][j ] + old[ip][j ]
+ old[im][jm] + old[i][jm] + old[ip][jm];
switch(nsum){
case 3:
new[i][j] = 1;
break;
case 2:
new[i][j] = old[i][j];
break;
default:
new[i][j] = 0;
}
}
}
/* copy new state into old state */
for(i=1; i<=NI; i++){
for(j=1; j<=NJ; j++){
old[i][j] = new[i][j];
}
}
}
/* Iterations are done; sum the number of live cells */
isum = 0;
for(i=1; i<=NI; i++){
for(j=1; j<=NJ; j++){
isum = isum + new[i][j];
}
}
printf("\nNumber of live cells = %d\n", isum);
return 0;
}
!----------------------
! Conway Game of Life
! serial version
!----------------------
program life
implicit none
integer, parameter :: ni=200, nj=200, nsteps = 500
integer :: i, j, n, im, ip, jm, jp, nsum, isum
integer, allocatable, dimension(:,:) :: old, new
real :: arand
! allocate arrays, including room for ghost cells
allocate(old(0:ni+1,0:nj+1), new(0:ni+1,0:nj+1))
! initialize elements of old to 0 or 1
do j = 1, nj
do i = 1, ni
call random_number(arand)
old(i,j) = nint(arand)
enddo
enddo
! iterate
time_iteration: do n = 1, nsteps
! corner boundary conditions
old(0,0) = old(ni,nj)
old(0,nj+1) = old(ni,1)
old(ni+1,nj+1) = old(1,1)
old(ni+1,0) = old(1,nj)
! left-right boundary conditions
old(1:ni,0) = old(1:ni,nj)
old(1:ni,nj+1) = old(1:ni,1)
! top-bottom boundary conditions
old(0,1:nj) = old(ni,1:nj)
old(ni+1,1:nj) = old(1,1:nj)
do j = 1, nj
do i = 1, ni
im = i - 1
ip = i + 1
jm = j - 1
jp = j + 1
nsum = old(im,jp) + old(i,jp) + old(ip,jp) &
+ old(im,j ) + old(ip,j ) &
+ old(im,jm) + old(i,jm) + old(ip,jm)
select case (nsum)
case (3)
new(i,j) = 1
case (2)
new(i,j) = old(i,j)
case default
new(i,j) = 0
end select
enddo
enddo
! copy new state into old state
old = new
enddo time_iteration
! Iterations are done; sum the number of live cells
isum = sum(new(1:ni,1:nj))
! Print final number of live cells.
write(*,*)"Number of live cells = ",isum
deallocate(old, new)
end program life
File moved
File moved
File moved
File moved
File moved
File moved
File moved
File moved
File moved
File moved
#include <stdio.h>
int main (int argc, char *argv[])
{
const int N=300;
int i,target;
int b[N];
FILE *infile,*outfile;
/* File b.data has the target value on the first line
The remaining 300 lines of b.data have the values for the b array */
infile = fopen("b.data","r" ) ;
outfile = fopen("found.data","w") ;
/* read in target */
fscanf(infile,"%d", &target);
/* read in b array */
for(i=0;i<N;i++) {
fscanf(infile,"%d", &b[i]);
}
fclose(infile);
/* Search the b array and output the target locations */
for(i=0;i<N;i++) {
if( b[i] == target) {
fprintf(outfile,"%d\n",i+1);
}
}
fclose(outfile);
return 0;
}
PROGRAM search
implicit none
integer, parameter :: N=300
integer i, target ! local variables
integer b(N) ! the entire array of integers
! File b.data has the target value on the first line
! The remaining 300 lines of b.data have the values for the b array
open(unit=10,file="b.data")
! File found.data will contain the indices of b where the target is
open(unit=11,file="found.data")
! Read in the target
read(10,*) target
! Read in b array
do i=1,N
read(10,*) b(i)
end do
! Search the b array and output the target locations
do i=1,N
if (b(i) == target) then
write(11,*) i
end if
end do
END PROGRAM search
#include <stdio.h>
#include <stdlib.h>
void srandom (unsigned seed);
double dboard (int darts);
#define DARTS 50000 /* number of throws at dartboard */
#define ROUNDS 10 /* number of times "darts" is iterated */
#define MASTER 0 /* task ID of master task */
int main (int argc, char *argv[])
{
double homepi, /* value of pi calculated by current task */
pi, /* average of pi after "darts" is thrown */
avepi, /* average pi value for all iterations */
pirecv, /* pi received from worker */
pisum; /* sum of workers pi values */
int i, n;
srandom (0);
avepi = 0;
for (i = 0; i < ROUNDS; i++) {
pi = dboard(DARTS);
/* Master calculates the average value of pi over all iterations */
avepi = ((avepi * i) + pi)/(i + 1);
printf(" After %8d throws, average value of pi = %10.8f\n",
(DARTS * (i + 1)),avepi);
}
return 0;
}
/******************************************************************************
* FILE: dboard.c
* DESCRIPTION:
* Used in pi calculation example codes.
* See mpi_pi_send.c and mpi_pi_reduce.c
* Throw darts at board. Done by generating random numbers
* between 0 and 1 and converting them to values for x and y
* coordinates and then testing to see if they "land" in
* the circle." If so, score is incremented. After throwing the
* specified number of darts, pi is calculated. The computed value
* of pi is returned as the value of this function, dboard.
* Note: the seed value for rand() is set in pi_send.f or pi_reduce.f.
* AUTHOR: unknown
* LAST REVISED: 04/14/05 Blaise Barney
****************************************************************************/
/*
Explanation of constants and variables used in this function:
darts = number of throws at dartboard
score = number of darts that hit circle
n = index variable
r = random number between 0 and 1
x_coord = x coordinate, between -1 and 1
x_sqr = square of x coordinate
y_coord = y coordinate, between -1 and 1
y_sqr = square of y coordinate
pi = computed value of pi
*/
#include <stdio.h>
#include <stdlib.h>
#define sqr(x) ((x)*(x))
long random(void);
double dboard(int darts)
{
double x_coord, y_coord, pi, r;
int score, n;
unsigned int cconst; /* must be 4-bytes in size */
/*************************************************************************
* The cconst variable must be 4 bytes. We check this and bail if it is
* not the right size
************************************************************************/
if (sizeof(cconst) != 4) {
printf("Wrong data size for cconst variable in dboard routine!\n");
printf("See comments in source file. Quitting.\n");
exit(1);
}
cconst = 2 << (31 - 1);
score = 0;
/* "throw darts at board" */
for (n = 1; n <= darts; n++) {
/* generate random numbers for x and y coordinates */
r = (double)random()/cconst;
x_coord = (2.0 * r) - 1.0;
r = (double)random()/cconst;
y_coord = (2.0 * r) - 1.0;
/* if dart lands in circle, increment score */
if ((sqr(x_coord) + sqr(y_coord)) <= 1.0)
score++;
}
/* calculate pi */
pi = 4.0 * (double)score/(double)darts;
return(pi);
}
program pi
implicit none
integer, parameter :: DARTS = 50000, ROUNDS = 10, MASTER = 0
real(8) :: pi_est
real(8) :: homepi, avepi, pirecv, pisum
integer :: rank
integer :: i, n
integer, allocatable :: seed(:)
! we set it to zero in the sequential run
rank = 0
! initialize the random number generator
! we make sure the seed is different for each task
call random_seed()
call random_seed(size = n)
allocate(seed(n))
seed = 12 + rank*11
call random_seed(put=seed(1:n))
deallocate(seed)
avepi = 0
do i = 0, ROUNDS-1
pi_est = dboard(DARTS)
! calculate the average value of pi over all iterations
avepi = ((avepi*i) + pi_est)/(i + 1)
print *, "After ", DARTS*(i+1), " throws, average value of pi =", avepi
end do
contains
real(8) function dboard(darts)
integer, intent(in) :: darts
real(8) :: x_coord, y_coord
integer :: score, n
score = 0
do n = 1, darts
call random_number(x_coord)
call random_number(y_coord)
if ((x_coord**2 + y_coord**2) <= 1.0d0) then
score = score + 1
end if
end do
dboard = 4.0d0*score/darts
end function
end program
62
183
271
291
296
# Overview
In this lab you will get more familiar with more advanced MPI topics, including one sided communication and MPI I/O.
### Goals
Get experience in MPI one sided communication, MPI I/O and topologies in MPI
### Duration
Three hours
# Source Codes
- MPI I/O. Serial hello world in C and Fortran ([hello_mpi.c](hello_mpi.c) and [hello_mpi.f90](hello_mpi.f90))
- MPI Derived types and I/O. Serial STL file reader in C and Fortran ([mpi_derived_types.c](mpi_derived_types.c) and [mpi_derived_types.f90](mpi_derived_types.f90)
- MPI Latency: C and Fortran ([mpi_latency.c](mpi_latency.c) and [mpi_latency.f90](mpi_latency.f90))
- MPI Bandwidth : C and Fortran ([mpi_bandwidth.c](mpi_bandwidth.c) and [mpi_bandwidth.f90](mpi_bandwidth.f90))
- MPI Bandwidth Non-Blocking: C and Fortran ([mpi_bandwidth-nonblock.c](mpi_bandwidth-nonblock.c)
and [mpi_bandwidth-nonblock.f90](mpi_bandwidth-nonblock.f90))
# Preparation
In preparation for this lab, read the [general instructions](../README.md) which will help you get going on Beskow.
# Exercise 1 - MPI I/O
MPI I/O is used so that results can be written to the same file in parallel. Take the serial hello world programs and modify them so that instead of writing the output to screen the output is written to a file using MPI I/O.
The simplest solution is likely to be for you to create a character buffer, and then use the MPI_File_write_at function.
# Exercise 2 - MPI I/O and derived types
Take the serial stl reader and modify it such that the stl file is read (and written) in parallel using collective MPI I/O. Use derived types such that the file can be read/written with a maximum of 3 I/O operations per read and write.
The simplest solution is likely to create a derived type for each triangle, and then use the MPI_File_XXXX_at_all function. A correct solution will have the same MD5 hash for both stl models (input and output), unless the order of the triangles has been changed.
```
md5sum out.stl data/sphere.stl
822aba6dc20cc0421f92ad50df95464c out.stl
822aba6dc20cc0421f92ad50df95464c data/sphere.stl
```
# Exercises 3 - Bandwidth and latency between nodes
Use `mpi_wtime` to compute latency and bandwidth with the bandwidth and latency codes above
**Note**: In modifying the original exercises provided by LLNL, We had to make a small change to the latency code as the Cray latency is a lot better than the tests were designed for. When the latency is of the order 1 millisecond, writing it out as an integer number of milliseconds did not make much sense.
For this exercise, it is nice to compare running on the same node e.g.
```
salloc -N 1 --ntasks-per-node=2 -A <project> -t 00:05:00
srun -n 2 ./mpi_latency.x
```
with running on separate nodes
```
salloc -N 2 --ntasks-per-node=1 -A <project> -t 00:05:00
srun -n 2 ./mpi_latency.x
```
Similarly for the bandwidth.
As you would expect the latency is much better on a single node than across nodes, but possibly unexpectedly if you just have 2 MPI tasks the bandwidth is better between nodes than across a single node. (probably related to lack of contention for resources, e.g. the gemini chips and the l3 cache etc.)
# Solutions
The solutions will be made available at the end of the lab.
# Acknowledgment
The examples in this lab are provided for educational purposes by
[National Center for Supercomputing Applications](http://www.ncsa.illinois.edu/),
(in particular their [Cyberinfrastructure Tutor](http://www.citutor.org/)),
[Lawrence Livermore National Laboratory](https://computing.llnl.gov/) and
[Argonne National Laboratory](http://www.mcs.anl.gov/). Much of the LLNL MPI materials comes from the
[Cornell Theory Center](http://www.cac.cornell.edu/).
We would like to thank them for allowing us to develop the material for machines at PDC.
You might find other useful educational materials at these sites.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment