Skip to content
Snippets Groups Projects
Select Git revision
  • df48442ff19bd836d3a3fe2c2d206285254a1a5f
  • master default protected
  • enxhi_issue460_remove_TOAR-I_access
  • michael_issue459_preprocess_german_stations
  • sh_pollutants
  • develop protected
  • release_v2.4.0
  • michael_issue450_feat_load-ifs-data
  • lukas_issue457_feat_set-config-paths-as-parameter
  • lukas_issue454_feat_use-toar-statistics-api-v2
  • lukas_issue453_refac_advanced-retry-strategy
  • lukas_issue452_bug_update-proj-version
  • lukas_issue449_refac_load-era5-data-from-toar-db
  • lukas_issue451_feat_robust-apriori-estimate-for-short-timeseries
  • lukas_issue448_feat_load-model-from-path
  • lukas_issue447_feat_store-and-load-local-clim-apriori-data
  • lukas_issue445_feat_data-insight-plot-monthly-distribution
  • lukas_issue442_feat_bias-free-evaluation
  • lukas_issue444_feat_choose-interp-method-cams
  • 414-include-crps-analysis-and-other-ens-verif-methods-or-plots
  • lukas_issue384_feat_aqw-data-handler
  • v2.4.0 protected
  • v2.3.0 protected
  • v2.2.0 protected
  • v2.1.0 protected
  • Kleinert_etal_2022_initial_submission
  • v2.0.0 protected
  • v1.5.0 protected
  • v1.4.0 protected
  • v1.3.0 protected
  • v1.2.1 protected
  • v1.2.0 protected
  • v1.1.0 protected
  • IntelliO3-ts-v1.0_R1-submit
  • v1.0.0 protected
  • v0.12.2 protected
  • v0.12.1 protected
  • v0.12.0 protected
  • v0.11.0 protected
  • v0.10.0 protected
  • IntelliO3-ts-v1.0_initial-submit
41 results

data_generator.py

Blame
  • start_spark_cluster.sh 2.16 KiB
    #!/bin/bash
    
    #SBATCH --partition=develbooster
    #SBATCH --account=atmlaml
    #SBATCH --nodes=1
    #SBATCH --tasks-per-node=9
    #SBATCH --time=00:60:00
    #SBATCH --gres gpu
    #SBATCH --job-name spark-cluster
     
    module load Stages/2023  GCC  OpenMPI Spark
    
    JOB="$SLURM_JOB_NAME-$SLURM_JOB_ID"
    export SPARK_WORKER_DIR="$SLURM_SUBMIT_DIR/$JOB/worker"
    export SPARK_LOG_DIR="$SLURM_SUBMIT_DIR/$JOB/log"
    export SPARK_MASTER_HOST=`hostname`
    export SPARK_MASTER_PORT="4124"
    export SPARK_WORKER_CORES=7
    export SPARK_WORKER_INSTANCES=5 ## This is not working yet
    export MASTER_URL="spark://${SPARK_MASTER_HOST}:${SPARK_MASTER_PORT}"
    export > ${SPARK_LOG_DIR}/env.txt
    
    echo "------------ Starting Spark Server -------------"
    echo "MASTER_URL: $MASTER_URL"
    echo "------------------------------------------------"
    start-master.sh
    
    echo "------------ Starting Spark Workers ------------"
    echo "MASTER_URL: $MASTER_URL"
    echo "------------------------------------------------"
    
    #srun -n 14 spark-class org.apache.spark.deploy.worker.Worker $MASTER_URL
    #srun -n 1 start-worker.sh $MASTER_URL
    start-worker.sh $MASTER_URL
    
    . wait-worker.sh
    
    echo "----------- How to test with spark shell ------"
    echo "sgoto $SLURM_JOB_ID 0"
    echo "module load Stages/2023  GCC  OpenMPI Spark"
    echo "export MASTER_URL=$MASTER_URL"
    echo "spark-shell --master \$MASTER_URL"
    echo "# Now your shell should connect to the spark master"
    echo
    echo "In the shell you can now execute the following test script"
    echo
    echo ">>>>"
    echo "val NUM_SAMPLES=1000000"
    echo "val count = sc.parallelize(1 to NUM_SAMPLES).filter { _ =>"
    echo "  val x = math.random"
    echo "  val y = math.random"
    echo "  x*x + y*y < 1"
    echo "}.count()"
    echo "println(s\"Pi is roughly \${4.0 * count / NUM_SAMPLES}\")"
    echo "<<<<"
    echo "------------------------------------------------"
    echo
    echo "----------- How to test with pyspark ------"
    echo "sgoto $SLURM_JOB_ID 0"
    echo "module load Stages/2023  GCC  OpenMPI Spark"
    echo "export MASTER_URL=$MASTER_URL"
    echo "python pyspark_pi.py"
    echo "------------------------------------------------"
    echo 
    echo "---------- How to kill -------------------------"
    echo scancel $SLURM_JOB_ID
    echo "------------------------------------------------"
    
    
    sleep infinity