Update

cf2768ff · Dirk Pleiter · 0cf837d5 · 0cf837d5 · cf2768ff · 0cf837d5
Commit cf2768ff authored Aug 16, 2021 by Dirk Pleiter
--- a/lab2/b.data
+++ b/lab2/b.data
- 10
- 35
- -19
- -46
- -2
- -50
- 8
- 42
- -47
- 23
- 11
- 40
- 45
- 50
- 8
- -49
- -6
- 0
- -32
- 48
- 39
- 26
- 14
- -25
- -47
- -22
- 48
- -19
- 5
- -40
- 22
- -38
- -9
- -11
- 7
- 16
- -10
- -26
- -29
- -17
- -23
- 15
- 25
- -42
- -23
- -30
- 44
- -28
- -46
- 19
- -29
- -45
- -24
- 29
- 36
- -18
- -25
- 26
- -12
- 42
- -35
- -11
- 10
- -40
- -43
- 5
- 19
- -35
- 6
- -31
- -3
- 2
- -15
- -9
- -17
- 13
- 0
- 45
- 35
- 35
- 29
- 28
- -15
- -27
- 24
- -39
- -27
- -37
- -26
- -23
- 19
- -32
- 27
- -27
- 49
- 2
- 20
- -15
- -9
- -35
- -34
- 23
- -20
- 26
- -10
- 26
- -6
- 31
- -32
- 33
- -34
- 11
- 7
- -29
- -20
- -43
- -15
- 44
- 2
- -19
- 28
- -17
- 0
- 13
- -14
- 9
- -28
- 43
- 26
- -10
- 4
- 41
- -21
- -44
- -11
- 29
- -33
- 2
- -3
- 0
- -1
- -14
- -31
- 38
- -14
- -6
- 43
- -3
- -33
- 46
- 9
- 14
- 26
- 35
- 41
- 13
- 8
- -7
- -46
- -44
- 40
- 17
- 26
- 19
- 48
- -24
- 12
- -36
- -25
- -46
- -2
- -15
- -18
- -17
- -2
- -46
- 31
- -39
- -39
- 5
- -40
- 27
- -25
- 10
- 17
- -6
- -44
- -47
- 26
- -40
- -19
- 41
- -15
- 22
- 14
- 15
- -19
- 24
- -14
- 2
- 45
- -34
- 37
- -26
- 49
- -41
- -31
- 1
- -28
- 3
- 14
- -44
- 18
- -7
- -45
- -38
- 28
- 1
- 49
- -40
- 31
- -24
- -42
- 15
- -6
- -43
- -33
- -38
- -49
- -2
- 11
- 7
- 49
- 28
- 39
- -5
- 25
- -3
- -1
- 37
- -34
- -20
- -1
- 27
- 31
- -19
- -33
- 41
- 8
- 39
- -42
- 29
- -21
- 16
- 19
- -25
- 16
- 19
- -3
- -11
- 15
- -1
- 6
- 36
- 5
- 11
- -12
- -41
- -41
- 2
- 12
- 10
- 0
- 14
- 21
- 39
- 19
- 48
- 20
- 38
- -28
- -4
- 30
- -12
- 36
- 33
- -2
- 16
- 9
- -4
- -48
- 10
- -38
- -7
- 28
- 14
- 10
- -24
- -39
- -42
- -19
--- a/lab3/data/sphere.stl
+++ b/lab3/data/sphere.stl
--- a/lab2/game_of_life-serial.c
+++ b/lab2/game_of_life-serial.c
-/***********************
-
-Conway Game of Life
-
-serial version
-
-************************/
-
-#include <stdio.h>
-
-#include <stdlib.h>
-
-#define NI 200        /* array sizes */
-
-#define NJ 200
-
-#define NSTEPS 500    /* number of time steps */
-
-int main(int argc, char *argv[])
-{
-  int i, j, n, im, ip, jm, jp, ni, nj, nsum, isum;
-  int **old, **new;  
-  float x;
-
-  /* allocate arrays */
-
-  ni = NI + 2;  /* add 2 for left and right ghost cells */
-  nj = NJ + 2;
-  old = malloc(ni*sizeof(int*));
-  new = malloc(ni*sizeof(int*));
-
-  for(i=0; i<ni; i++){
-    old[i] = malloc(nj*sizeof(int));
-    new[i] = malloc(nj*sizeof(int));
-  }
-
-  /* initialize elements of old to 0 or 1 */
-
-  for(i=1; i<=NI; i++){
-    for(j=1; j<=NJ; j++){
-      x = rand()/((float)RAND_MAX + 1);
-      if(x<0.5){
-        old[i][j] = 0;
-      } else {
-        old[i][j] = 1;
-      }
-    }
-  }
-
-  /*  time steps */
-  for(n=0; n<NSTEPS; n++){
-
-    /* corner boundary conditions */
-    old[0][0] = old[NI][NJ];
-    old[0][NJ+1] = old[NI][1];
-    old[NI+1][NJ+1] = old[1][1];
-    old[NI+1][0] = old[1][NJ];
-
-    /* left-right boundary conditions */
-
-    for(i=1; i<=NI; i++){
-      old[i][0] = old[i][NJ];
-      old[i][NJ+1] = old[i][1];
-    }
-
-    /* top-bottom boundary conditions */
-
-    for(j=1; j<=NJ; j++){
-      old[0][j] = old[NI][j];
-      old[NI+1][j] = old[1][j];
-    }
-
-    for(i=1; i<=NI; i++){
-      for(j=1; j<=NJ; j++){
-        im = i-1;
-        ip = i+1;
-        jm = j-1;
-        jp = j+1;
-
-        nsum =  old[im][jp] + old[i][jp] + old[ip][jp]
-              + old[im][j ]              + old[ip][j ] 
-              + old[im][jm] + old[i][jm] + old[ip][jm];
-
-        switch(nsum){
-
-        case 3:
-          new[i][j] = 1;
-          break;
-
-        case 2:
-          new[i][j] = old[i][j];
-          break;
-
-        default:
-          new[i][j] = 0;
-        }
-      }
-    }
-
-    /* copy new state into old state */
-
-    for(i=1; i<=NI; i++){
-      for(j=1; j<=NJ; j++){
-        old[i][j] = new[i][j];
-      }
-    }
-  }
-
-  /*  Iterations are done; sum the number of live cells */
-  isum = 0;
-  for(i=1; i<=NI; i++){
-    for(j=1; j<=NJ; j++){
-      isum = isum + new[i][j];
-    }
-  }
-  printf("\nNumber of live cells = %d\n", isum);
-
-  return 0;
-}
--- a/lab2/game_of_life-serial.f90
+++ b/lab2/game_of_life-serial.f90
-!----------------------
-!  Conway Game of Life
-!    serial version
-!----------------------
-
-program life
-  
-  implicit none
-  integer, parameter :: ni=200, nj=200, nsteps = 500
-  integer :: i, j, n, im, ip, jm, jp, nsum, isum
-  integer, allocatable, dimension(:,:) :: old, new
-  real :: arand
-
-  ! allocate arrays, including room for ghost cells
-
-  allocate(old(0:ni+1,0:nj+1), new(0:ni+1,0:nj+1))
-
-  ! initialize elements of old to 0 or 1
-
-  do j = 1, nj
-     do i = 1, ni
-        call random_number(arand)
-        old(i,j) = nint(arand)
-     enddo
-  enddo
-
-  !  iterate
-
-  time_iteration: do n = 1, nsteps
-
-     ! corner boundary conditions
-
-     old(0,0) = old(ni,nj)
-     old(0,nj+1) = old(ni,1)
-     old(ni+1,nj+1) = old(1,1)
-     old(ni+1,0) = old(1,nj)
-
-     ! left-right boundary conditions
-
-     old(1:ni,0) = old(1:ni,nj)
-     old(1:ni,nj+1) = old(1:ni,1)
-
-     ! top-bottom boundary conditions
-
-     old(0,1:nj) = old(ni,1:nj)
-     old(ni+1,1:nj) = old(1,1:nj)
-
-     do j = 1, nj
-        do i = 1, ni
-
-           im = i - 1
-           ip = i + 1
-           jm = j - 1
-           jp = j + 1
-           nsum = old(im,jp) + old(i,jp) + old(ip,jp) &
-                + old(im,j )             + old(ip,j ) &
-                + old(im,jm) + old(i,jm) + old(ip,jm)
-
-           select case (nsum)
-           case (3)
-              new(i,j) = 1
-           case (2)
-              new(i,j) = old(i,j)
-           case default
-              new(i,j) = 0
-           end select
-
-        enddo
-     enddo
-
-     ! copy new state into old state
-
-     old = new
-
-  enddo time_iteration
-
-  ! Iterations are done; sum the number of live cells
-  
-  isum = sum(new(1:ni,1:nj))
-  
-  ! Print final number of live cells.
-
-  write(*,*)"Number of live cells = ",isum
-
-  deallocate(old, new)
-
-end program life
-
--- a/lab3/hello_mpi.c
+++ b/lab3/hello_mpi.c
--- a/lab3/hello_mpi.f90
+++ b/lab3/hello_mpi.f90
--- a/lab3/mpi_bandwidth-nonblock.c
+++ b/lab3/mpi_bandwidth-nonblock.c
--- a/lab3/mpi_bandwidth-nonblock.f90
+++ b/lab3/mpi_bandwidth-nonblock.f90
--- a/lab3/mpi_bandwidth.c
+++ b/lab3/mpi_bandwidth.c
--- a/lab3/mpi_bandwidth.f90
+++ b/lab3/mpi_bandwidth.f90
--- a/lab3/mpi_derived_types.c
+++ b/lab3/mpi_derived_types.c
--- a/lab3/mpi_derived_types.f90
+++ b/lab3/mpi_derived_types.f90
--- a/lab3/mpi_latency.c
+++ b/lab3/mpi_latency.c
--- a/lab3/mpi_latency.f90
+++ b/lab3/mpi_latency.f90
--- a/lab2/parallel_search-serial.c
+++ b/lab2/parallel_search-serial.c
-#include <stdio.h>
-
-int main (int argc, char *argv[])
-{
-  const int N=300;
-  int i,target;
-  int b[N];
-  FILE *infile,*outfile;
-
-  /* File b.data has the target value on the first line
-     The remaining 300 lines of b.data have the values for the b array */
-  infile = fopen("b.data","r" ) ;
-  outfile = fopen("found.data","w") ;
-    
-  /* read in target */
-  fscanf(infile,"%d", &target);
-
-  /* read in b array */
-  for(i=0;i<N;i++) {
-    fscanf(infile,"%d", &b[i]);
-  }
-  fclose(infile);
-
-  /* Search the b array and output the target locations */
-
-  for(i=0;i<N;i++) {
-    if( b[i] == target) {
-      fprintf(outfile,"%d\n",i+1);
-    }
-  }
-  fclose(outfile);
- 
-  return 0;
-}
--- a/lab2/parallel_search-serial.f90
+++ b/lab2/parallel_search-serial.f90
-PROGRAM search  
-  implicit none
-  integer, parameter ::  N=300
-  integer i, target ! local variables
-  integer b(N)      ! the entire array of integers
-
-  ! File b.data has the target value on the first line
-  ! The remaining 300 lines of b.data have the values for the b array
-  open(unit=10,file="b.data")     
-
-  ! File found.data will contain the indices of b where the target is
-  open(unit=11,file="found.data")
-
-  ! Read in the target
-  read(10,*) target
-
-  ! Read in b array 
-
-  do i=1,N
-     read(10,*) b(i)
-  end do
-
-  ! Search the b array and output the target locations
-
-  do i=1,N
-     if (b(i) == target) then
-        write(11,*) i
-     end if
-  end do
-
-END PROGRAM search 
--- a/lab2/pi_serial.c
+++ b/lab2/pi_serial.c
-#include <stdio.h>
-#include <stdlib.h>
-
-void srandom (unsigned seed);
-double dboard (int darts);
-#define DARTS 50000     /* number of throws at dartboard */
-#define ROUNDS 10       /* number of times "darts" is iterated */
-#define MASTER 0        /* task ID of master task */
-
-int main (int argc, char *argv[])
-{
-  double  homepi,         /* value of pi calculated by current task */
-          pi,             /* average of pi after "darts" is thrown */
-          avepi,          /* average pi value for all iterations */
-          pirecv,         /* pi received from worker */
-          pisum;          /* sum of workers pi values */
-  int     i, n;
-
-  srandom (0);
-
-  avepi = 0;
-  for (i = 0; i < ROUNDS; i++) {
-    pi = dboard(DARTS);
-
-    /* Master calculates the average value of pi over all iterations */
-    avepi = ((avepi * i) + pi)/(i + 1); 
-    printf("   After %8d throws, average value of pi = %10.8f\n",
-           (DARTS * (i + 1)),avepi);
-
-  } 
-
-  return 0;
-}
-
-
-
-/******************************************************************************
-* FILE: dboard.c
-* DESCRIPTION:
-*   Used in pi calculation example codes. 
-*   See mpi_pi_send.c and mpi_pi_reduce.c  
-*   Throw darts at board.  Done by generating random numbers 
-*   between 0 and 1 and converting them to values for x and y 
-*   coordinates and then testing to see if they "land" in 
-*   the circle."  If so, score is incremented.  After throwing the 
-*   specified number of darts, pi is calculated.  The computed value 
-*   of pi is returned as the value of this function, dboard. 
-*   Note:  the seed value for rand() is set in pi_send.f or pi_reduce.f. 
-* AUTHOR: unknown
-* LAST REVISED: 04/14/05 Blaise Barney
-****************************************************************************/
-/*
-Explanation of constants and variables used in this function:
-  darts       = number of throws at dartboard
-  score       = number of darts that hit circle
-  n           = index variable
-  r           = random number between 0 and 1
-  x_coord     = x coordinate, between -1 and 1
-  x_sqr       = square of x coordinate
-  y_coord     = y coordinate, between -1 and 1
-  y_sqr       = square of y coordinate
-  pi          = computed value of pi
-*/
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#define sqr(x) ((x)*(x))
-long random(void);
-
-double dboard(int darts)
-{
-  double x_coord, y_coord, pi, r; 
-  int score, n;
-  unsigned int cconst;  /* must be 4-bytes in size */
-  /*************************************************************************
-   * The cconst variable must be 4 bytes. We check this and bail if it is
-   * not the right size
-   ************************************************************************/
-  if (sizeof(cconst) != 4) {
-    printf("Wrong data size for cconst variable in dboard routine!\n");
-    printf("See comments in source file. Quitting.\n");
-    exit(1);
-  }
-  cconst = 2 << (31 - 1);
-  score = 0;
-
-  /* "throw darts at board" */
-  for (n = 1; n <= darts; n++) {
-    /* generate random numbers for x and y coordinates */
-    r = (double)random()/cconst;
-    x_coord = (2.0 * r) - 1.0;
-    r = (double)random()/cconst;
-    y_coord = (2.0 * r) - 1.0;
-
-    /* if dart lands in circle, increment score */
-    if ((sqr(x_coord) + sqr(y_coord)) <= 1.0)
-      score++;
-  }
-
-  /* calculate pi */
-  pi = 4.0 * (double)score/(double)darts;
-  return(pi);
-} 
--- a/lab2/pi_serial.f90
+++ b/lab2/pi_serial.f90
-program pi
-
-implicit none
-
-integer, parameter :: DARTS = 50000, ROUNDS = 10, MASTER = 0
-
-real(8) :: pi_est
-real(8) :: homepi, avepi, pirecv, pisum
-integer :: rank
-integer :: i, n
-integer, allocatable :: seed(:)
-
-! we set it to zero in the sequential run
-rank = 0
-
-! initialize the random number generator
-! we make sure the seed is different for each task
-call random_seed()
-call random_seed(size = n)
-allocate(seed(n))
-seed = 12 + rank*11
-call random_seed(put=seed(1:n))
-deallocate(seed)
-
-avepi = 0
-do i = 0, ROUNDS-1
-   pi_est = dboard(DARTS)
-
-   ! calculate the average value of pi over all iterations
-   avepi = ((avepi*i) + pi_est)/(i + 1)
-
-   print *, "After ", DARTS*(i+1), " throws, average value of pi =", avepi
-end do
-
-contains
-
-   real(8) function dboard(darts)
-
-      integer, intent(in) :: darts
-
-      real(8) :: x_coord, y_coord
-      integer :: score, n
-
-      score = 0
-      do n = 1, darts
-         call random_number(x_coord)
-         call random_number(y_coord)
-
-         if ((x_coord**2 + y_coord**2) <= 1.0d0) then
-            score = score + 1
-         end if
-      end do
-      dboard = 4.0d0*score/darts
-
-   end function
-
-end program
--- a/lab2/reference.found.data
+++ b/lab2/reference.found.data
-           62
-          183
-          271
-          291
-          296
--- a/lab3/README.md
+++ b/lab3/README.md
-# Overview
-
-In this lab you will get more familiar with more advanced MPI topics, including one sided communication and MPI I/O.
-
-### Goals
-
-Get experience in MPI one sided communication, MPI I/O and topologies in MPI
-
-### Duration
-
-Three hours
-
-# Source Codes
-
- MPI I/O. Serial hello world in C and Fortran ([hello_mpi.c](hello_mpi.c) and [hello_mpi.f90](hello_mpi.f90))
- MPI Derived types and I/O. Serial STL file reader in C and Fortran ([mpi_derived_types.c](mpi_derived_types.c) and [mpi_derived_types.f90](mpi_derived_types.f90)
- MPI Latency: C and Fortran ([mpi_latency.c](mpi_latency.c) and [mpi_latency.f90](mpi_latency.f90))
- MPI Bandwidth : C and Fortran ([mpi_bandwidth.c](mpi_bandwidth.c) and [mpi_bandwidth.f90](mpi_bandwidth.f90))
- MPI Bandwidth Non-Blocking: C and Fortran ([mpi_bandwidth-nonblock.c](mpi_bandwidth-nonblock.c) 
-  and [mpi_bandwidth-nonblock.f90](mpi_bandwidth-nonblock.f90))
- 
-
-# Preparation
-
-In preparation for this lab, read the [general instructions](../README.md) which will help you get going on Beskow.
-
-# Exercise 1 - MPI I/O
-
-MPI I/O is used so that results can be written to the same file in parallel. Take the serial hello world programs and modify them so that instead of writing the output to screen the output is written to a file using MPI I/O.
-
-The simplest solution is likely to be for you to create a character buffer, and then use the MPI_File_write_at function.
-
-# Exercise 2 - MPI I/O and derived types
-
-Take the serial stl reader and modify it such that the stl file is read (and written) in parallel using collective MPI I/O. Use derived types such that the file can be read/written with a maximum of 3 I/O operations per read and write.
-
-The simplest solution is likely to create a derived type for each triangle, and then use the MPI_File_XXXX_at_all function. A correct solution will have the same MD5 hash for both stl models (input and output), unless the order of the triangles has been changed.
-
-```
-md5sum out.stl data/sphere.stl
-822aba6dc20cc0421f92ad50df95464c  out.stl
-822aba6dc20cc0421f92ad50df95464c  data/sphere.stl
-```
-
-# Exercises 3 - Bandwidth and latency between nodes
-
-Use `mpi_wtime` to compute latency and bandwidth with the bandwidth and latency codes above
-
-**Note**: In modifying the original exercises provided by LLNL, We had to make a small change to the latency code as the Cray latency is a lot better than the tests were designed for. When the latency is of the order 1 millisecond, writing it out as an integer number of milliseconds did not make much sense.
-
-For this exercise, it is nice to compare running on the same node e.g.
-
-```
-salloc -N 1 --ntasks-per-node=2 -A <project> -t 00:05:00
-srun -n 2 ./mpi_latency.x
-```
-
-with running on separate nodes
-
-```
-salloc -N 2 --ntasks-per-node=1 -A <project> -t 00:05:00
-srun -n 2 ./mpi_latency.x
-```
-
-Similarly for the bandwidth.
-
-As you would expect the latency is much better on a single node than across nodes, but possibly unexpectedly if you just have 2 MPI tasks the bandwidth is better between nodes than across a single node. (probably related to lack of contention for resources, e.g. the gemini chips and the l3 cache etc.)
-
-# Solutions
-
-The solutions will be made available at the end of the lab.
-
-# Acknowledgment
-
-The examples in this lab are provided for educational purposes by 
-[National Center for Supercomputing Applications](http://www.ncsa.illinois.edu/), 
-(in particular their [Cyberinfrastructure Tutor](http://www.citutor.org/)), 
-[Lawrence Livermore National Laboratory](https://computing.llnl.gov/) and 
-[Argonne National Laboratory](http://www.mcs.anl.gov/). Much of the LLNL MPI materials comes from the 
-[Cornell Theory Center](http://www.cac.cornell.edu/). 
-We would like to thank them for allowing us to develop the material for machines at PDC. 
-You might find other useful educational materials at these sites.
-
-