From bc196472854cc4c30f5babbe39d86e57e72a961e Mon Sep 17 00:00:00 2001 From: janEbert <janpublicebert@posteo.net> Date: Tue, 9 Jul 2024 11:24:27 +0200 Subject: [PATCH] Handle more errors Also some refactoring. --- pytorch-ddp-example/activate.sh | 17 ++++++++++++---- pytorch-ddp-example/set_up.sh | 33 +++++++++++++++++++++++++++----- pytorch-fsdp-example/activate.sh | 17 ++++++++++++---- pytorch-fsdp-example/set_up.sh | 33 +++++++++++++++++++++++++++----- 4 files changed, 82 insertions(+), 18 deletions(-) diff --git a/pytorch-ddp-example/activate.sh b/pytorch-ddp-example/activate.sh index 4083f0a..517544b 100644 --- a/pytorch-ddp-example/activate.sh +++ b/pytorch-ddp-example/activate.sh @@ -5,17 +5,26 @@ if [ -z "$curr_dir" ]; then curr_dir="$(dirname "$curr_file")" fi -venv_dir="$curr_dir"/env +venv_dir="$curr_dir"/env-"$SYSTEMNAME" [ -x "$(command -v deactivate)" ] && deactivate module --force purge +if ! [ -f "$curr_dir"/modules.sh ]; then + echo "Cannot find \`$curr_dir/modules.sh\`; its existence is required." + exit 1 +fi source "$curr_dir"/modules.sh if ! [ -d "$venv_dir" ]; then - echo "Cannot set up \`venv\` on JUWELS Booster compute node." \ - "Please manually execute \`bash set_up.sh\` on a login node." + echo "Please manually execute \`bash ${curr_dir@Q}/set_up.sh\`" \ + "on a login node to create the \`venv\`." + exit 1 +elif ! [ -f "$venv_dir"/bin/activate ]; then + echo "Something seems to be wrong with the \`venv\` at \`$venv_dir\`." \ + "Please delete it (\`rm -rf ${venv_dir@Q}\`) and" \ + "execute \`bash ${curr_dir@Q}/set_up.sh\`" \ + "on a login node to re-create the \`venv\`." exit 1 fi - source "$venv_dir"/bin/activate diff --git a/pytorch-ddp-example/set_up.sh b/pytorch-ddp-example/set_up.sh index 95d837a..9c86a5a 100644 --- a/pytorch-ddp-example/set_up.sh +++ b/pytorch-ddp-example/set_up.sh @@ -3,16 +3,39 @@ curr_file="${BASH_SOURCE[0]:-${(%):-%x}}" curr_dir="$(dirname "$curr_file")" -if ! [ -d "$curr_dir"/env ]; then +venv_dir="$curr_dir"/env-"$SYSTEMNAME" + +if ! [ -d "$venv_dir" ]; then [ -x "$(command -v deactivate)" ] && deactivate + module --force purge + if ! [ -f "$curr_dir"/modules.sh ]; then + echo "Cannot find \`$curr_dir/modules.sh\`; its existence is required." + exit 1 + fi source "$curr_dir"/modules.sh - python3 -m venv --system-site-packages "$curr_dir"/env - source "$curr_dir"/env/bin/activate + + python3 -m venv --system-site-packages "$venv_dir" + + if ! [ -f "$venv_dir"/bin/activate ]; then + echo "Something seems to be wrong with the \`venv\` at \`$venv_dir\`." \ + "Please delete it (\`rm -rf ${venv_dir@Q}\`) and" \ + "execute \`bash ${curr_dir@Q}/set_up.sh\`" \ + "on a login node to re-create the \`venv\`." + exit 1 + fi + source "$venv_dir"/bin/activate + python -m pip install -U pip + if ! [ -f "$curr_dir"/requirements.txt ]; then + echo "Cannot find \`$curr_dir/requirements.txt\`;" \ + "its existence is required." + exit 1 + fi python -m pip install -r "$curr_dir"/requirements.txt + deactivate else - echo "\`venv\` is already set up at \`$curr_dir/env\`." \ - "Please delete it to force a re-generation." + echo "\`venv\` is already set up at \`$venv_dir\`. Please" \ + "delete it (\`rm -rf ${venv_dir@Q}\`) to force a re-generation." fi diff --git a/pytorch-fsdp-example/activate.sh b/pytorch-fsdp-example/activate.sh index 4083f0a..517544b 100644 --- a/pytorch-fsdp-example/activate.sh +++ b/pytorch-fsdp-example/activate.sh @@ -5,17 +5,26 @@ if [ -z "$curr_dir" ]; then curr_dir="$(dirname "$curr_file")" fi -venv_dir="$curr_dir"/env +venv_dir="$curr_dir"/env-"$SYSTEMNAME" [ -x "$(command -v deactivate)" ] && deactivate module --force purge +if ! [ -f "$curr_dir"/modules.sh ]; then + echo "Cannot find \`$curr_dir/modules.sh\`; its existence is required." + exit 1 +fi source "$curr_dir"/modules.sh if ! [ -d "$venv_dir" ]; then - echo "Cannot set up \`venv\` on JUWELS Booster compute node." \ - "Please manually execute \`bash set_up.sh\` on a login node." + echo "Please manually execute \`bash ${curr_dir@Q}/set_up.sh\`" \ + "on a login node to create the \`venv\`." + exit 1 +elif ! [ -f "$venv_dir"/bin/activate ]; then + echo "Something seems to be wrong with the \`venv\` at \`$venv_dir\`." \ + "Please delete it (\`rm -rf ${venv_dir@Q}\`) and" \ + "execute \`bash ${curr_dir@Q}/set_up.sh\`" \ + "on a login node to re-create the \`venv\`." exit 1 fi - source "$venv_dir"/bin/activate diff --git a/pytorch-fsdp-example/set_up.sh b/pytorch-fsdp-example/set_up.sh index 95d837a..9c86a5a 100644 --- a/pytorch-fsdp-example/set_up.sh +++ b/pytorch-fsdp-example/set_up.sh @@ -3,16 +3,39 @@ curr_file="${BASH_SOURCE[0]:-${(%):-%x}}" curr_dir="$(dirname "$curr_file")" -if ! [ -d "$curr_dir"/env ]; then +venv_dir="$curr_dir"/env-"$SYSTEMNAME" + +if ! [ -d "$venv_dir" ]; then [ -x "$(command -v deactivate)" ] && deactivate + module --force purge + if ! [ -f "$curr_dir"/modules.sh ]; then + echo "Cannot find \`$curr_dir/modules.sh\`; its existence is required." + exit 1 + fi source "$curr_dir"/modules.sh - python3 -m venv --system-site-packages "$curr_dir"/env - source "$curr_dir"/env/bin/activate + + python3 -m venv --system-site-packages "$venv_dir" + + if ! [ -f "$venv_dir"/bin/activate ]; then + echo "Something seems to be wrong with the \`venv\` at \`$venv_dir\`." \ + "Please delete it (\`rm -rf ${venv_dir@Q}\`) and" \ + "execute \`bash ${curr_dir@Q}/set_up.sh\`" \ + "on a login node to re-create the \`venv\`." + exit 1 + fi + source "$venv_dir"/bin/activate + python -m pip install -U pip + if ! [ -f "$curr_dir"/requirements.txt ]; then + echo "Cannot find \`$curr_dir/requirements.txt\`;" \ + "its existence is required." + exit 1 + fi python -m pip install -r "$curr_dir"/requirements.txt + deactivate else - echo "\`venv\` is already set up at \`$curr_dir/env\`." \ - "Please delete it to force a re-generation." + echo "\`venv\` is already set up at \`$venv_dir\`. Please" \ + "delete it (\`rm -rf ${venv_dir@Q}\`) to force a re-generation." fi -- GitLab