Skip to content
Snippets Groups Projects
Commit 3fa796d6 authored by Chelsea Maria John's avatar Chelsea Maria John
Browse files

initial commit

parents
No related branches found
No related tags found
1 merge request!1Master
OpenCatalystOutput/logs/*
hpc/open_catalyst/results/*
hpc/open_catalyst/logs/*
hpc/*
venv/*
\ No newline at end of file
{"config": {"trainer": "mlperf_forces", "dataset": [{"src": "/p/scratch/hai_mlperf/oc20_data/s2ef/2M/train", "normalize_labels": true, "target_mean": -0.7554450631141663, "target_std": 2.887317180633545, "grad_target_mean": 0.0, "grad_target_std": 2.887317180633545}, {"src": "/p/scratch/hai_mlperf/oc20_data/s2ef/all/val_id"}], "task": {"mlperf_benchmark": "oc20", "mlperf_org": "LBNL", "mlperf_division": "closed", "mlperf_status": "onprem", "mlperf_platform": "SUBMISSION_PLATFORM_PLACEHOLDER", "mlperf_accelerators_per_node": 4, "mlperf_accelerators_per_rank": 1, "dataset": "trajectory_lmdb", "description": "Regressing to energies and forces for DFT trajectories from OCP", "type": "regression", "metric": "mae", "primary_metric": "forces_mae", "target_forces_mae": 0.036, "labels": ["potential energy"], "grad_input": "atomic forces", "train_on_free_atoms": true, "eval_on_free_atoms": true}, "model": {"name": "dimenetplusplus", "hidden_channels": 192, "out_emb_channels": 192, "num_blocks": 3, "cutoff": 6.0, "num_radial": 6, "num_spherical": 7, "num_before_skip": 1, "num_after_skip": 2, "num_output_layers": 3, "regress_forces": true, "use_pbc": true}, "optim": {"batch_size": 4, "eval_batch_size": 4, "num_workers": 1, "optimizer": "Adam", "lr_initial": 0.0004, "warmup_steps": 31252, "warmup_factor": 0.2, "lr_milestones": [125008, 187512, 250016], "lr_gamma": 0.1, "max_epochs": 30, "energy_coefficient": 0, "force_coefficient": 50, "disable_tqdm": true}, "slurm": {"job_name": "oc20_s2ef", "account": "hai_mlperf", "partition": "booster", "tasks_per_node": 4, "gres": "gpu:4", "time": "00:10:00"}, "mode": "train", "identifier": "", "seed": 0, "is_debug": false, "run_dir": "./", "is_vis": false, "print_every": 10, "amp": false, "nonddp": false, "checkpoint": null, "cpu": false, "submit": true, "local_rank": 0, "distributed_port": 13356, "world_size": 8, "distributed_backend": "nccl"}, "slurm_id": "5612744", "timestamp": "12:04:36PM CEST Jun 02, 2022"}
This diff is collapsed.
{"config": {"trainer": "mlperf_forces", "dataset": [{"src": "/p/scratch/hai_mlperf/oc20_data/s2ef/2M/train", "normalize_labels": true, "target_mean": -0.7554450631141663, "target_std": 2.887317180633545, "grad_target_mean": 0.0, "grad_target_std": 2.887317180633545}, {"src": "/p/scratch/hai_mlperf/oc20_data/s2ef/all/val_id"}], "task": {"mlperf_benchmark": "oc20", "mlperf_org": "LBNL", "mlperf_division": "closed", "mlperf_status": "onprem", "mlperf_platform": "SUBMISSION_PLATFORM_PLACEHOLDER", "mlperf_accelerators_per_node": 4, "mlperf_accelerators_per_rank": 1, "dataset": "trajectory_lmdb", "description": "Regressing to energies and forces for DFT trajectories from OCP", "type": "regression", "metric": "mae", "primary_metric": "forces_mae", "target_forces_mae": 0.036, "labels": ["potential energy"], "grad_input": "atomic forces", "train_on_free_atoms": true, "eval_on_free_atoms": true}, "model": {"name": "dimenetplusplus", "hidden_channels": 192, "out_emb_channels": 192, "num_blocks": 3, "cutoff": 6.0, "num_radial": 6, "num_spherical": 7, "num_before_skip": 1, "num_after_skip": 2, "num_output_layers": 3, "regress_forces": true, "use_pbc": true}, "optim": {"batch_size": 4, "eval_batch_size": 4, "num_workers": 1, "optimizer": "Adam", "lr_initial": 0.0004, "warmup_steps": 31252, "warmup_factor": 0.2, "lr_milestones": [125008, 187512, 250016], "lr_gamma": 0.1, "max_epochs": 30, "energy_coefficient": 0, "force_coefficient": 50, "disable_tqdm": true}, "slurm": {"job_name": "oc20_s2ef", "account": "hai_mlperf", "partition": "booster", "tasks_per_node": 4, "gres": "gpu:4", "time": "4:00:00"}, "mode": "train", "identifier": "", "seed": 0, "is_debug": false, "run_dir": "./", "is_vis": false, "print_every": 10, "amp": false, "nonddp": false, "checkpoint": null, "cpu": false, "submit": true, "local_rank": 0, "distributed_port": 13356, "world_size": 8, "distributed_backend": "nccl"}, "slurm_id": "5612879", "timestamp": "12:35:39PM CEST Jun 02, 2022"}
This diff is collapsed.
{"config": {"trainer": "mlperf_forces", "dataset": [{"src": "/p/scratch/hai_mlperf/oc20_data/s2ef/2M/train", "normalize_labels": true, "target_mean": -0.7554450631141663, "target_std": 2.887317180633545, "grad_target_mean": 0.0, "grad_target_std": 2.887317180633545}, {"src": "/p/scratch/hai_mlperf/oc20_data/s2ef/all/val_id"}], "task": {"mlperf_benchmark": "oc20", "mlperf_org": "LBNL", "mlperf_division": "closed", "mlperf_status": "onprem", "mlperf_platform": "SUBMISSION_PLATFORM_PLACEHOLDER", "mlperf_accelerators_per_node": 4, "mlperf_accelerators_per_rank": 1, "dataset": "trajectory_lmdb", "description": "Regressing to energies and forces for DFT trajectories from OCP", "type": "regression", "metric": "mae", "primary_metric": "forces_mae", "target_forces_mae": 0.036, "labels": ["potential energy"], "grad_input": "atomic forces", "train_on_free_atoms": true, "eval_on_free_atoms": true}, "model": {"name": "dimenetplusplus", "hidden_channels": 192, "out_emb_channels": 192, "num_blocks": 3, "cutoff": 6.0, "num_radial": 6, "num_spherical": 7, "num_before_skip": 1, "num_after_skip": 2, "num_output_layers": 3, "regress_forces": true, "use_pbc": true}, "optim": {"batch_size": 8, "eval_batch_size": 8, "num_workers": 1, "optimizer": "Adam", "lr_initial": 0.0004, "warmup_steps": 31252, "warmup_factor": 0.2, "lr_milestones": [125008, 187512, 250016], "lr_gamma": 0.1, "max_epochs": 30, "energy_coefficient": 0, "force_coefficient": 50, "disable_tqdm": true}, "slurm": {"job_name": "oc20_s2ef", "account": "hai_mlperf", "partition": "booster", "tasks_per_node": 4, "gres": "gpu:4", "time": "8:00:00"}, "mode": "train", "identifier": "", "seed": 0, "is_debug": false, "run_dir": "./", "is_vis": false, "print_every": 10, "amp": false, "nonddp": false, "checkpoint": null, "cpu": false, "submit": true, "local_rank": 0, "distributed_port": 13356, "world_size": 8, "distributed_backend": "nccl"}, "slurm_id": "5635558", "timestamp": "09:13:15AM CEST Jun 08, 2022"}
This diff is collapsed.
#!/bin/bash
# See https://stackoverflow.com/a/28336473
SOURCE_PATH="${BASH_SOURCE[0]:-${(%):-%x}}"
RELATIVE_PATH="$(dirname "$SOURCE_PATH")"
ABSOLUTE_PATH="$(realpath "${RELATIVE_PATH}")"
[[ "$0" != "${SOURCE_PATH}" ]] && echo "The activation script must be sourced, otherwise the virtual environment will not work." || ( echo "Vars script must be sourced." && exit 1) ;
source "${ABSOLUTE_PATH}"/config.sh
source "${ABSOLUTE_PATH}"/modules.sh
export PYTHONPATH="$(echo "${ENV_DIR}"/lib/python*/site-packages):${PYTHONPATH}"
source "${ENV_DIR}"/bin/activate
SOURCE_PATH="${BASH_SOURCE[0]:-${(%):-%x}}"
## Check if this script is sourced
[[ "$0" != "${SOURCE_PATH}" ]] && echo "Setting vars" || ( echo "Vars script must be sourced." && exit 1) ;
## Determine location of this file
RELATIVE_PATH="$(dirname "$SOURCE_PATH")"
ABSOLUTE_PATH="$(realpath "${RELATIVE_PATH}")"
####################################
### User Configuration
export ENV_NAME=export ENV_NAME="$(basename "$ABSOLUTE_PATH")" # Default Name of the venv is the directory that contains this file
export ENV_DIR="${ABSOLUTE_PATH}"/venv # Default location of this VENV is "./venv"
#!/bin/bash
SOURCE_PATH="${BASH_SOURCE[0]:-${(%):-%x}}"
RELATIVE_PATH="$(dirname "$SOURCE_PATH")"
ABSOLUTE_PATH="$(realpath "${RELATIVE_PATH}")"
source "${ABSOLUTE_PATH}"/config.sh
KERNELFILE="${ENV_DIR}"/kernel.sh
echo the name is "$ENV_NAME"
echo "Setting up the kernel script in the following dir: " "${KERNELFILE}"
echo '#!/bin/bash
source "'"${ABSOLUTE_PATH}"'"/activate.sh
exec python -m ipykernel "$@"' > "${KERNELFILE}"
chmod a+x "${KERNELFILE}"
mkdir -p ~/.local/share/jupyter/kernels/"${ENV_NAME}"
echo '{
"argv": [
"'"${KERNELFILE}"'",
"-f",
"{connection_file}"
],
"display_name": "'"${ENV_NAME}"'",
"language": "python"
}' > ~/.local/share/jupyter/kernels/"${ENV_NAME}"/kernel.json
module purge
ml Stages/2022 GCC OpenMPI TensorFlow Horovod mpi4py PyTorch scikit-learn Jupyter CUDA OpenCV \
matplotlib PyTorch-Geometric ASE numba Pandoc tensorboardX PyYAML Pillow-SIMD JupyterKernel-PyDeepLearning tqdm
demjson
wandb
submitit
pymatgen
ray[tune]
pre-commit
lmdb==1.1.1
git+https://github.com/mlperf/logging.git
git+https://github.com/rusty1s/pytorch_geometric.git@4ea63d3
setup.sh 0 → 100644
#!/bin/bash
SOURCE_PATH="${BASH_SOURCE[0]:-${(%):-%x}}"
RELATIVE_PATH="$(dirname "$SOURCE_PATH")"
ABSOLUTE_PATH="$(realpath "${RELATIVE_PATH}")"
source "${ABSOLUTE_PATH}"/config.sh
source "${ABSOLUTE_PATH}"/modules.sh
python -m venv --prompt "$ENV_NAME" --system-site-packages "${ENV_DIR}"
source "${ABSOLUTE_PATH}"/activate.sh
python -m pip install -r "${ABSOLUTE_PATH}"/requirements.txt
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment