diff --git a/docs/budgeting.md b/docs/budgeting.md index 217505c4b3f2348d3e1fd30ddc43e110b8332fe8..8361ad2b37c49f271dc595f9cf64136136efaf81 100644 --- a/docs/budgeting.md +++ b/docs/budgeting.md @@ -31,7 +31,7 @@ The comute time used for one job will be accounted by the following formula: Jobs that run on nodes equipped with GPUs are charged in the same way. Independent of the usage of the GPUs the available cores on the host CPU node are taken into account. -Detailed information of each job can be found in KontView which is accessible via the button 'show extended statistics' for each project in [JuDoor](https://judoor.fz-juelich.de/projects/trainingFIXME/). +Detailed information of each job can be found in KontView which is accessible via the button 'show extended statistics' for each project in [JuDoor](https://judoor.fz-juelich.de/projects/training2510/). Alternatively, you can execute the following command on the login nodes to query your CPU quota usage: `jutil user cpuquota`. Further information can be found in the "Accounting" chapter of the corresponding [System Documentation](./useful-links.md#system-documentation). diff --git a/docs/environment.md b/docs/environment.md index 5a005c7d5d27e7643093a7f9e84fc7547263d1d3..4f2a6abff9bdcae6a966449335e0b761996ac98f 100644 --- a/docs/environment.md +++ b/docs/environment.md @@ -30,10 +30,10 @@ For brevity's sake, one can also make one of the projects the "active project" a This can also be done through the `jutil` command: ``` -$ jutil env activate -p trainingFIXME -A trainingFIXME +$ jutil env activate -p training2510 -A training2510 ``` -Now `trainingFIXME` is the active project. +Now `training2510` is the active project. Any computational jobs will be accounted against its budget and the special file system locations associated with it can be reached through certain environment variables. More about that in the next section. @@ -58,22 +58,22 @@ At least two directories are created for each project: Data projects have access to other storage locations, e.g. the tape based `ARCHIVE` for long term storage of results. -The path of these directories is available as the value of environment variables of the form `<directory>_<project>`, e.g. `PROJECT_trainingFIXME` or `SCRATCH_trainingFIXME`. +The path of these directories is available as the value of environment variables of the form `<directory>_<project>`, e.g. `PROJECT_training2510` or `SCRATCH_training2510`. If you have activated a project in the previous section, you will also have environment variables that are just `PROJECT` and `SCRATCH` that point to the respective directories of the active project. -Print the contents of `PROJECT_trainingFIXME` and `PROJECT`: +Print the contents of `PROJECT_training2510` and `PROJECT`: ``` -$ printenv PROJECT_trainingFIXME -/p/project1/trainingFIXME +$ printenv PROJECT_training2510 +/p/project1/training2510 $ printenv PROJECT -/p/project1/trainingFIXME +/p/project1/training2510 ``` Change into that directory and see what is already there: ``` -$ cd $PROJECT_trainingFIXME +$ cd $PROJECT_training2510 $ ls ``` diff --git a/docs/running-jobs.md b/docs/running-jobs.md index b243778a4707d9e63123cf2c35f7e7e9269ed163..6e8ba9e777bd6d55a90eb04f64e0ffc4fa4a3cd9 100644 --- a/docs/running-jobs.md +++ b/docs/running-jobs.md @@ -47,7 +47,7 @@ Do not forget to replace `YYYYMMDD`, where `YYYY` and `MM` and `DD` are the curr ``` $ hostname jrlogin09.jureca -$ srun -A trainingFIXME --reservation hands-on-YYYYMMDD hostname +$ srun -A training2510 --reservation hands-on-YYYYMMDD hostname srun: job 3472578 queued and waiting for resources srun: job 3472578 has been allocated resources jrc0454 @@ -61,7 +61,7 @@ To submit to JUWELS Cluster, you want to be logged in to the Cluster login nodes ``` $ hostname jwlogin02.juwels -$ srun -A trainingFIXME --reservation hands-on-cluster-YYYYMMDD hostname +$ srun -A training2510 --reservation hands-on-cluster-YYYYMMDD hostname srun: job 9792359 queued and waiting for resources srun: job 9792359 has been allocated resources jwc06n213.juwels @@ -72,7 +72,7 @@ To submit to JUWELS Booster, you want to be logged in to the Booster login nodes ``` $ hostname jwlogin24.juwels -$ srun -A trainingFIXME --reservation hands-on-booster-YYYYMMDD --gres gpu:4 hostname +$ srun -A training2510 --reservation hands-on-booster-YYYYMMDD --gres gpu:4 hostname srun: job 4575092 queued and waiting for resources srun: job 4575092 has been allocated resources jwb0053.juwels @@ -88,7 +88,7 @@ $ srun <srun options...> <program> <program options...> Above we have seen four `srun` options: -- `-A` (short for `--account`) to charge the resources consumed by the computation to the budget allotted to this course (if you have used `jutil env activate -A trainingFIXME` earlier on, you do not need this). +- `-A` (short for `--account`) to charge the resources consumed by the computation to the budget allotted to this course (if you have used `jutil env activate -A training2510` earlier on, you do not need this). :::info @@ -100,7 +100,7 @@ The training account budget can be used till the end of the month. After this, o :::info -For JURECA and JUSUF use the following reservation: `hands-on-YYYYMMDD`. To work on JUWELS Cluster or Booster modules, you have to use `hands-on-cluster-YYYYMMDD` or `hands-on-booster-YYYYMMDD` respectively. Do not forget to replace `YYYYMMDD`, where `YYYY` and `MM` and `DD` are the current year and month and day in the Gregorian calendar, e.g. `20240522`. +For JURECA and JUSUF use the following reservation: `hands-on-YYYYMMDD`. To work on JUWELS Cluster or Booster modules, you have to use `hands-on-cluster-YYYYMMDD` or `hands-on-booster-YYYYMMDD` respectively. Do not forget to replace `YYYYMMDD`, where `YYYY` and `MM` and `DD` are the current year and month and day in the Gregorian calendar, e.g. `20250513`. ::: @@ -118,7 +118,7 @@ For the `<program>` we used `hostname` with no arguments of its own. To run more parallel instances of a program, increase the number of Slurm *tasks* using the `-n` option to `srun`: ``` -$ srun --label -A trainingFIXME --reservation hands-on-cluster-YYYYMMDD -n 10 hostname +$ srun --label -A training2510 --reservation hands-on-cluster-YYYYMMDD -n 10 hostname srun: job 3472812 queued and waiting for resources srun: job 3472812 has been allocated resources 8: jwc00n002.juwels @@ -144,7 +144,7 @@ Note also the `--label` option to `srun` which prefixes every line of output by Running more tasks than will fit on a single node will allocate two nodes and split the tasks between nodes: ``` -$ srun --label -A trainingFIXME --reservation hands-on-cluster-YYYYMMDD -n 100 hostname +$ srun --label -A training2510 --reservation hands-on-cluster-YYYYMMDD -n 100 hostname srun: job 3473040 queued and waiting for resources srun: job 3473040 has been allocated resources 0: jwc00n007.juwels @@ -160,7 +160,7 @@ Running over multiple nodes without intending to is also likely to degrade perfo You can now also use `srun` to run the `hellompi` program introduced in the previous section on deploying custom software: ``` -$ srun -A trainingFIXME --reservation hands-on-cluster-YYYYMMDD -n 5 ./hellompi +$ srun -A training2510 --reservation hands-on-cluster-YYYYMMDD -n 5 ./hellompi srun: job 3471349 queued and waiting for resources srun: job 3471349 has been allocated resources hello from process 4 of 5 @@ -204,7 +204,7 @@ However, since the number of CPU cores is always rounded up to the next multiple Using the `-N` command line argument, you can request a number of nodes from the resource manager (remember to specify `--gres gpu:4` for JUWELS Booster): ``` -$ salloc -A trainingFIXME --reservation hands-on-cluster-YYYYMMDD -N 1 +$ salloc -A training2510 --reservation hands-on-cluster-YYYYMMDD -N 1 salloc: Pending job allocation 3475519 salloc: job 3475519 queued and waiting for resources salloc: job 3475519 has been allocated resources @@ -281,7 +281,7 @@ And enter the following script: ```sh #!/bin/bash -#SBATCH --account=trainingFIXME +#SBATCH --account=training2510 #SBATCH --reservation=hands-on-cluster-YYYYMMDD #SBATCH --nodes=2 #SBATCH --cpus-per-task=1 @@ -360,7 +360,7 @@ By default, Slurm assumes that the processes you create are single threaded and Allocate a node for playing around with this mechanism: ``` -$ salloc -A trainingFIXME --reservation hands-on-cluster-YYYYMMDD -N 1 +$ salloc -A training2510 --reservation hands-on-cluster-YYYYMMDD -N 1 salloc: Pending job allocation 3499694 salloc: job 3499694 queued and waiting for resources salloc: job 3499694 has been allocated resources diff --git a/docs/using-gpus.md b/docs/using-gpus.md index eca7092922022e5a2e345f24af04f93b70344a5b..b5dab5eb6a356d80e7c54fd815ad18c52c3bf374 100644 --- a/docs/using-gpus.md +++ b/docs/using-gpus.md @@ -25,9 +25,9 @@ We load the necessary modules, navigate into our individual user directories for ``` $ module load NVHPC ParaStationMPI MPI-settings/CUDA -$ cd $PROJECT_trainingFIXME/$USER +$ cd $PROJECT_training2510/$USER $ git clone https://github.com/NVIDIA/cuda-samples.git -$ cd $PROJECT_trainingFIXME/$USER/cuda-samples/Samples/0_Introduction/simpleMPI +$ cd $PROJECT_training2510/$USER/cuda-samples/Samples/0_Introduction/simpleMPI $ make /p/software/jurecadc/stages/2024/software/psmpi/5.9.2-1-NVHPC-23.7-CUDA-12/bin/mpicxx -I../../../Common -o simpleMPI_mpi.o -c simpleMPI.cpp /p/software/jurecadc/stages/2024/software/CUDA/12/bin/nvcc -ccbin g++ -I../../../Common -m64 --threads 0 --std=c++11 -Xcompiler -fPIE -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_89,code=sm_89 -gencode arch=compute_90,code=sm_90 -gencode arch=compute_90,code=compute_90 -o simpleMPI.o -c simpleMPI.cu @@ -40,7 +40,7 @@ There should now be an executable called `simpleMPI` inside the `simpleMPI` dire To run the program, use `srun` like before: ``` -$ srun -A trainingFIXME -p <gpu partition> --gres gpu:4 -N 1 -n 4 ./simpleMPI +$ srun -A training2510 -p <gpu partition> --gres gpu:4 -N 1 -n 4 ./simpleMPI [...] Running on 4 nodes Average of square roots is: 0.667305 @@ -76,7 +76,7 @@ Afterwards we log out from the compute node with `exit`, put the executed `srun` If you want to try this example yourself, remember top change the sgoto command to the appropriate JobID, followed by a 0 (indicating the first, and in this case only, node in the job). ``` -$ srun -N 1 -n 1 -t 00:10:00 -A trainingFIXME -p develbooster --gres=gpu:4 sleep 600 & +$ srun -N 1 -n 1 -t 00:10:00 -A training2510 -p develbooster --gres=gpu:4 sleep 600 & [1] 25114 srun: job 5535332 queued and waiting for resources srun: job 5535332 has been allocated resources @@ -117,7 +117,7 @@ Thu May 12 08:49:34 2022 $ exit logout $ fg -srun -N 1 -n 1 -t 00:10:00 -A trainingFIXME -p develbooster --gres=gpu:4 sleep 500 +srun -N 1 -n 1 -t 00:10:00 -A training2510 -p develbooster --gres=gpu:4 sleep 500 ^Csrun: sending Ctrl-C to StepId=5535332.0 srun: forcing job termination srun: Job step aborted: Waiting up to 6 seconds for job step to finish. @@ -142,7 +142,7 @@ Let us investigate further on this with a practical example. First, we prepare a device query example, (remembering to reload the modules from the first example if you are completing this in a different session). ``` -$ cd $PROJECT_trainingFIXME/$USER/cuda-samples/Samples/1_Utilities/deviceQueryDrv/ +$ cd $PROJECT_training2510/$USER/cuda-samples/Samples/1_Utilities/deviceQueryDrv/ make /p/software/jurecadc/stages/2024/software/CUDA/12/bin/nvcc -ccbin g++ -I../../../Common -m64 --threads 0 --std=c++11 -gencode arch=compute_50,code=compute_50 -o deviceQueryDrv.o -c deviceQueryDrv.cpp /p/software/jurecadc/stages/2024/software/CUDA/12/bin/nvcc -ccbin g++ -m64 -gencode arch=compute_50,code=compute_50 -o deviceQueryDrv deviceQueryDrv.o -L/p/software/jurecadc/stages/2024/software/CUDA/12/lib64/stubs -lcuda @@ -162,7 +162,7 @@ We perform this in this manner, as we wish to get information from multiple comm #SBATCH --time=00:01:00 #SBATCH --partition=develbooster #SBATCH --gres=gpu:4 -#SBATCH -A trainingFIXME +#SBATCH -A training2510 module load CUDA NVHPC ParaStationMPI MPI-settings/CUDA @@ -354,7 +354,7 @@ Use the same modules for compilation which you are planning to use for execution ``` $ module load NVHPC CUDA OpenMPI $ mpicxx -O0 -I$CUDA_HOME/include -L$CUDA_HOME/lib64 -lcudart -lcuda mpiBroadcasting.cpp -$ srun -N 2 -n 8 -t 01:00:00 -A trainingFIXME -p booster --gres=gpu:4 ./a.out +$ srun -N 2 -n 8 -t 01:00:00 -A training2510 -p booster --gres=gpu:4 ./a.out Broadcasting to all host memories took 4.526835 seconds. Broadcasting to all GPUs took 7.481972 seconds with intermediate copy to host memory. Broadcasting to all GPUs took 2.625439 seconds. diff --git a/docusaurus.config.ts b/docusaurus.config.ts index d8cae183f16a1a74a0aecc68635eb305cdafe012..befee0050bb5689faae48b133e7bb7bb99fd7555 100644 --- a/docusaurus.config.ts +++ b/docusaurus.config.ts @@ -107,15 +107,15 @@ const config: Config = { items: [ { label: 'Day 1', - href: 'https://indico3-jsc.fz-juelich.de/event/149/surveys/78', + href: 'https://indico3-jsc.fz-juelich.de/event/226/surveys/135', }, { label: 'Day 2', - href: 'https://indico3-jsc.fz-juelich.de/event/149/surveys/79', + href: 'https://indico3-jsc.fz-juelich.de/event/226/surveys/136', }, { label: 'Day 3', - href: 'https://indico3-jsc.fz-juelich.de/event/149/surveys/72', + href: 'https://indico3-jsc.fz-juelich.de/event/226/surveys/137', }, ], }, @@ -124,23 +124,23 @@ const config: Config = { items: [ { label: 'Day 1 Talks', - href: 'https://indico3-jsc.fz-juelich.de/event/149/surveys/74', + href: 'https://indico3-jsc.fz-juelich.de/event/226/surveys/138', }, { label: 'Day 2 Talks', - href: 'https://indico3-jsc.fz-juelich.de/event/149/surveys/75', + href: 'https://indico3-jsc.fz-juelich.de/event/226/surveys/139', }, { label: 'Day 3 Talks', - href: 'https://indico3-jsc.fz-juelich.de/event/149/surveys/76', + href: 'https://indico3-jsc.fz-juelich.de/event/226/surveys/140', }, { label: 'Day 4 Talks', - href: 'https://indico3-jsc.fz-juelich.de/event/149/surveys/77', + href: 'https://indico3-jsc.fz-juelich.de/event/226/surveys/141', }, { label: 'Event in total', - href: 'https://indico3-jsc.fz-juelich.de/event/149/surveys/73', + href: 'https://indico3-jsc.fz-juelich.de/event/226/surveys/142', }, ], }, @@ -149,11 +149,11 @@ const config: Config = { items: [ { label: 'Zoom', - href: 'https://fz-juelich-de.zoom.us/j/63566578749?pwd=Vm0vSkFKd1FOYjNkV3ltekhWNTBodz09', + href: 'https://fz-juelich-de.zoom-x.de/j/64190088739?pwd=bS0O4mnE4TP8FV3TUr3foxYmN7TIDm.1', }, { label: 'Slack', - href: 'https://join.slack.com/t/introtoscmay2024/shared_invite/zt-2inepuzqd-n9RNyjsNCjZABZJ3x6D_vg', + href: 'https://join.slack.com/t/introtoscmay2025/shared_invite/zt-350vzvcmc-ozpHglLTlPUJ1h37pc618Q', }, ], }, @@ -178,7 +178,7 @@ const config: Config = { }, { label: 'Past course materials', - href: 'https://www.fz-juelich.de/en/ias/jsc/news/events/training-courses/2023/supercomputing-2', + href: 'https://www.fz-juelich.de/en/ias/jsc/news/events/training-courses/2024/supercomputing-2', }, ], },