diff --git a/video_prediction_tools/env_setup/create_env.sh b/video_prediction_tools/env_setup/create_env.sh index 750abfb9a8a95db6c6f115e9ad3c14c81320b109..079007f75b9c42f1139c3f3006f11d4979f51d04 100755 --- a/video_prediction_tools/env_setup/create_env.sh +++ b/video_prediction_tools/env_setup/create_env.sh @@ -2,13 +2,13 @@ # # __authors__ = Bing Gong, Michael Langguth # __date__ = '2020_01_15' -# __last_update__ = '2021_04_28' by Michael Langguth +# __last_update__ = '2021_10_28' by Michael Langguth # # **************** Description **************** # This script can be used for setting up the virtual environment needed for AMBS-project # The name of the virtual environment is controlled by the first parsed argument. # It also setups the (Batch) runscript templates to customized runscripts (to be used by generate_runscript.py) -# Note that the basic output directory for the workflow may be set may passing a path as second argument to this script. +# Note that the basic output directory for the workflow may be set may parsing -base_dir [my_dir]. # **************** Description **************** # # **************** Auxiliary functions **************** @@ -54,24 +54,24 @@ ENV_SETUP_DIR=`pwd` WORKING_DIR="$(dirname "$ENV_SETUP_DIR")" EXE_DIR="$(basename "$ENV_SETUP_DIR")" ENV_DIR=${WORKING_DIR}/${ENV_NAME} +TF_CONTAINER=${WORKING_DIR}/tensorflow_21.09-tf1-py3.sif ## perform sanity checks # correct bool_container if host is Juwels Booster and ensure running singularity -if [[ "${bool_container}" == 0 ]] && [[ "${HOST_NAME}" == jwlogin2[1-4]* ]]; then +if [[ "${bool_container}" == 0 ]]; then echo "******************************************** NOTE ********************************************" - echo " Running on Juwels Booster is only possible inside a container environment. " + echo " Set up virtual environment without TF1.15-container. " + echo " Note that training without container using GPUs on the Juelich HPC-systems is not possible! " echo "******************************************** NOTE ********************************************" - bool_container=1 fi +modules_purge="" if [[ "${bool_container}" == 1 ]]; then - echo "******************************************** NOTE ********************************************" - echo " Make use of dedicated Horovod-related working branches only!!! " - echo "******************************************** NOTE ********************************************" - # Check if singularity is running - if [[ -z "${SINGULARITY_NAME}" ]]; then - echo "ERROR: create_env.sh must be executed in a running singularity on Juwels in conjuction with container-usage." - echo "Thus, execute 'singularity shell [my_docker_image]' first!" + echo "Virtual environment will be set up in TensorFlow 1.15-container." + modules_purge=purge + # Check if singularity exists + if [[ ! -f "${TF_CONTAINER}" ]]; then + echo "ERROR: Could not found required TensorFlow 1.15-container under ${TF_CONTAINER}.sh" return fi fi @@ -94,20 +94,12 @@ else fi ## check integratability of modules -if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *jwlogin* ]]; then - if [[ "${bool_container}" > 0 ]]; then - # on Juwels Booster, we are in a container environment -> loading modules is not possible - echo "***** Note for container environment! *****" - echo "Already checked the required modules?" - echo "To do so, run 'source modules_train.sh' after exiting the singularity." - echo "***** Note for container environment! *****" - else +if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *juwels* || "${HOST_NAME}" == *jwlogin* ]]; then # load modules and check for their availability echo "***** Checking modules required during the workflow... *****" source ${ENV_SETUP_DIR}/modules_preprocess.sh purge source ${ENV_SETUP_DIR}/modules_train.sh purge - source ${ENV_SETUP_DIR}/modules_postprocess.sh - fi + source ${ENV_SETUP_DIR}/modules_postprocess.sh ${modules_purge} else echo "ERROR: AMBS-workflow is currently only supported on the Juelich HPC-systems HDF-ML, Juwels and Juwels Booster" return @@ -115,96 +107,71 @@ else # unset PYTHONPATH fi - -# Create fresh virtual environment or just activate the existing one -if [[ "$ENV_EXIST" == 0 ]]; then - # Check modules first - if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *juwels* ]]; then - if [[ "${HOST_NAME}" == jwlogin2[1-4]* ]]; then - # on Juwels Booster, we are in a container environment -> loading modules is not possible - echo "***** Note for Juwels Booster! *****" - echo "Already checked the required modules?" - echo "To do so, run 'source modules_train.sh' after exiting the singularity." - echo "***** Note for Juwels Booster! *****" - else - # load modules and check for their availability - echo "***** Checking modules required during the workflow... *****" - source ${ENV_SETUP_DIR}/modules_preprocess.sh purge - source ${ENV_SETUP_DIR}/modules_train.sh purge - source ${ENV_SETUP_DIR}/modules_postprocess.sh - fi - else - # unset PYTHONPATH on every other machine that is not a known HPC-system - unset PYTHONPATH - fi -fi - ## set up virtual environment if [[ "$ENV_EXIST" == 0 ]]; then # Activate virtual environment and install additional Python packages. echo "Configuring and activating virtual environment on ${HOST_NAME}" - - python3 -m venv $ENV_DIR + + if [[ "${bool_container}" == 1 ]]; then + singularity exec --nv "${TF_CONTAINER}" ./install_venv_container.sh "${ENV_DIR}" + else + # cretae virtual environemt here + python3 -m venv $ENV_DIR - activate_virt_env=${ENV_DIR}/bin/activate + activate_virt_env=${ENV_DIR}/bin/activate - echo "Entering virtual environment ${ENV_DIR} to install required Python modules..." - source ${activate_virt_env} + echo "Activating virtual environment ${ENV_DIR} to install required Python modules..." + source ${activate_virt_env} - # install some requirements and/or check for modules - if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *jwlogin* ]]; then # Install packages depending on host - echo "***** Start installing additional Python modules with pip... *****" - req_file=${ENV_SETUP_DIR}/requirements.txt - if [[ "${bool_container}" > 0 ]]; then req_file=${ENV_SETUP_DIR}/requirements_container.txt; fi - - pip3 install --no-cache-dir -r ${req_file} - else - echo "***** Start installing additional Python modules with pip... *****" - pip3 install --upgrade pip - pip3 install -r ${ENV_SETUP_DIR}/requirements.txt - pip3 install mpi4py - pip3 install netCDF4 - pip3 install numpy - pip3 install h5py - pip3 install tensorflow-gpu==1.13.1 - fi + if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *juwels* || "${HOST_NAME}" == *jwlogin* ]]; then + echo "***** Start installing additional Python modules with pip... *****" + req_file=${ENV_SETUP_DIR}/requirements.txt + if [[ "${bool_container}" > 0 ]]; then req_file=${ENV_SETUP_DIR}/requirements_container.txt; fi - # expand PYTHONPATH... - export PYTHONPATH=${WORKING_DIR}:$PYTHONPATH >> ${activate_virt_env} - export PYTHONPATH=${WORKING_DIR}/utils:$PYTHONPATH >> ${activate_virt_env} - export PYTHONPATH=${WORKING_DIR}/external_package/lpips-tensorflow:$PYTHONPATH >> ${activate_virt_env} - export PYTHONPATH=${WORKING_DIR}/model_modules:$PYTHONPATH >> ${activate_virt_env} - export PYTHONPATH=${WORKING_DIR}/postprocess:$PYTHONPATH >> ${activate_virt_env} - - if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *jwlogin* ]]; then - export PYTHONPATH=${ENV_DIR}/lib/python3.6/site-packages:$PYTHONPATH >> ${activate_virt_env} - if [[ "${bool_container}" > 0 ]]; then - export PYTONPATH=/usr/locali/lib/python3.6/dist-packages:$PYTHONPATH - fi - fi - # ...and ensure that this also done when the - echo "" >> ${activate_virt_env} - echo "# Expand PYTHONPATH..." >> ${activate_virt_env} - echo "export PYTHONPATH=${WORKING_DIR}:\$PYTHONPATH" >> ${activate_virt_env} - echo "export PYTHONPATH=${WORKING_DIR}/utils/:\$PYTHONPATH" >> ${activate_virt_env} - echo "export PYTHONPATH=${WORKING_DIR}/model_modules:$PYTHONPATH " >> ${activate_virt_env} - echo "export PYTHONPATH=${WORKING_DIR}/external_package/lpips-tensorflow:\$PYTHONPATH" >> ${activate_virt_env} - echo "export PYTHONPATH=${WORKING_DIR}/postprocess:$PYTHONPATH" >> ${activate_virt_env} - - if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *juwels* ]]; then - echo "export PYTHONPATH=${ENV_DIR}/lib/python3.6/site-packages:\$PYTHONPATH" >> ${activate_virt_env} - if [[ "${bool_container}" > 0 ]]; then - echo "export PYTONPATH=/usr/locali/lib/python3.6/dist-packages:\$PYTHONPATH" >> ${activate_virt_env} - fi + pip3 install --no-cache-dir -r ${req_file} + else + echo "***** Start installing additional Python modules with pip... *****" + req_file=${ENV_SETUP_DIR}/requirements_noHPC.txt + pip3 install --upgrade pip + pip3 install --no-cache-dir -r ${req_file} + fi + + # expand PYTHONPATH... + export PYTHONPATH=${WORKING_DIR}:$PYTHONPATH >> ${activate_virt_env} + export PYTHONPATH=${WORKING_DIR}/utils:$PYTHONPATH >> ${activate_virt_env} + export PYTHONPATH=${WORKING_DIR}/external_package/lpips-tensorflow:$PYTHONPATH >> ${activate_virt_env} + export PYTHONPATH=${WORKING_DIR}/model_modules:$PYTHONPATH >> ${activate_virt_env} + export PYTHONPATH=${WORKING_DIR}/postprocess:$PYTHONPATH >> ${activate_virt_env} + + if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *jwlogin* ]]; then + export PYTHONPATH=${ENV_DIR}/lib/python3.6/site-packages:$PYTHONPATH >> ${activate_virt_env} + fi + # ...and ensure that this also done when the + echo "" >> "${activate_virt_env}" + # shellcheck disable=SC2129 + echo "# Expand PYTHONPATH..." >> ${activate_virt_env} + echo "export PYTHONPATH=${WORKING_DIR}:\$PYTHONPATH" >> ${activate_virt_env} + echo "export PYTHONPATH=${WORKING_DIR}/utils/:\$PYTHONPATH" >> ${activate_virt_env} + echo "export PYTHONPATH=${WORKING_DIR}/model_modules:$PYTHONPATH " >> ${activate_virt_env} + echo "export PYTHONPATH=${WORKING_DIR}/external_package/lpips-tensorflow:\$PYTHONPATH" >> ${activate_virt_env} + echo "export PYTHONPATH=${WORKING_DIR}/postprocess:$PYTHONPATH" >> ${activate_virt_env} + + if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *juwels* ]]; then + echo "export PYTHONPATH=${ENV_DIR}/lib/python3.6/site-packages:\$PYTHONPATH" >> ${activate_virt_env} + fi fi info_str="Virtual environment ${ENV_DIR} has been set up successfully." elif [[ "$ENV_EXIST" == 1 ]]; then # loading modules of postprocessing and activating virtual env are suifficient if [[ "${bool_container}" == 0 ]]; then source ${ENV_SETUP_DIR}/modules_postprocess.sh + # activate virtual envirionment + source ${ENV_DIR}/bin/activate + else + # activate virtual envirionment with path-adaption + source ${ENV_DIR}/${ENV_NAME}/bin/activate fi - source ${ENV_DIR}/bin/activate info_str="Virtual environment ${ENV_DIR} has been activated successfully." fi