Differences
This shows you the differences between two versions of the page.
Both sides previous revision Previous revision Next revision | Previous revision Next revision Both sides next revision | ||
slurm [2022/10/25 14:01] vodrazka [ÚFAL Grid Engine (LRC)] |
slurm [2023/03/15 17:30] rosa [Batch mode] Rudolfs template |
||
---|---|---|---|
Line 7: | Line 7: | ||
===== Node list by partitions ===== | ===== Node list by partitions ===== | ||
+ | The naming convention is straightforward for CPU nodes - nodes in each group are numbered. For GPU nodes the format is: [t]dll-**X**gpu**N** where **X** gives total number of GPUs equipped and **N** is just enumerating the order of the node with the given configuration. | ||
+ | The prefix **t** is for nodes at Troja and **dll** stands for Deep Learning Laboratory. | ||
==== cpu-troja ==== | ==== cpu-troja ==== | ||
+ | | Node name | Thread count | Socket: | ||
+ | | achilles[1-8] | 32 | 2:8:2 | 128810 | | ||
+ | | hector[1-8] | 32 | 2:8:2 | 128810 | | ||
+ | | helena[1-8] | 32 | 2:8:2 | 128811 | | ||
+ | | paris[1-8] | 32 | 2:8:2 | 128810 | | ||
+ | | hyperion[2-8] | 64 | 2:16:2 | 257667 | | ||
==== cpu-ms ==== | ==== cpu-ms ==== | ||
+ | | Node name | Thread count | Socket: | ||
+ | | iridium | 16 | 2:4:2 | 515977 | | ||
+ | | orion[1-8] | 40 | 2:10:2 | 128799 | | ||
==== gpu-troja ==== | ==== gpu-troja ==== | ||
+ | | Node name | Thread count | Socket: | ||
+ | | tdll-3gpu[1-4] | 64 | 2:16:2 | 128642 | gpuram48G gpu_cc8.6 | NVIDIA A40 | | ||
+ | | tdll-8gpu[1, | ||
+ | | tdll-8gpu[3-7] | 32 | 2:8:2 | 253725 | gpuram16G gpu_cc7.5 | NVIDIA Quadro P5000 | | ||
==== gpu-ms ==== | ==== gpu-ms ==== | ||
+ | |||
+ | | Node name | Thread count | Socket: | ||
+ | | dll-3gpu[1-5] | 64 | 2:16:2 | 128642 | gpuram48G gpu_cc8.6 | NVIDIA A40 | | ||
+ | | dll-4gpu[1, | ||
+ | | dll-8gpu[1, | ||
+ | | dll-8gpu[3, | ||
+ | | dll-8gpu[5, | ||
+ | | dll-10gpu1 | 32 | 2:8:2 | 257830 | gpuram16G gpu_cc8.6 | NVIDIA RTX A4000 | | ||
+ | | dll-10gpu[2, | ||
+ | |||
+ | |||
+ | ==== Submit nodes ==== | ||
+ | |||
In order to submit a job you need to login to one of the head nodes: | In order to submit a job you need to login to one of the head nodes: | ||
Line 19: | Line 47: | ||
| | ||
| | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
===== Basic usage ===== | ===== Basic usage ===== | ||
Line 33: | Line 65: | ||
#!/bin/bash | #!/bin/bash | ||
#SBATCH -J helloWorld | #SBATCH -J helloWorld | ||
- | #SBATCH -p cpu-troja | + | #SBATCH -p cpu-troja |
#SBATCH -o helloWorld.out | #SBATCH -o helloWorld.out | ||
#SBATCH -e helloWorld.err | #SBATCH -e helloWorld.err | ||
Line 48: | Line 80: | ||
#SBATCH -D / | #SBATCH -D / | ||
#SBATCH -N 2 # number of nodes (default 1) | #SBATCH -N 2 # number of nodes (default 1) | ||
- | #SBATCH --nodelist=node1, | + | #SBATCH --nodelist=node1, |
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||
#SBATCH --gres=gpu: | #SBATCH --gres=gpu: | ||
Line 67: | Line 99: | ||
< | < | ||
man sbatch | man sbatch | ||
+ | </ | ||
+ | |||
+ | === Rudolf' | ||
+ | |||
+ | The main point is for log files to have the job name and job id in them automatically. | ||
+ | |||
+ | < | ||
+ | #SBATCH -J RuRjob | ||
+ | #SBATCH -o %x.%j.out | ||
+ | #SBATCH -e %x.%j.err | ||
+ | #SBATCH -p gpu-troja | ||
+ | #SBATCH --gres=gpu: | ||
+ | #SBATCH --mem=16G | ||
+ | #SBATCH --constraint=" | ||
+ | |||
+ | # Print each command to STDERR before executing (expanded), prefixed by "+ " | ||
+ | set -o xtrace | ||
</ | </ | ||
Line 166: | Line 215: | ||
* '' | * '' | ||
- | * '' | + | * '' |
**To get interactive job with a single GPU of any kind:** | **To get interactive job with a single GPU of any kind:** | ||
< | < | ||
* '' | * '' | ||
- | * '' | + | * '' |
< | < | ||
* '' | * '' | ||
- | * '' | + | * '' |
- | * '' | + | * Note that e.g. '' |
+ | * '' | ||
< | < | ||
- | * '' | + | * '' |
+ | |||
+ | ==== Delete Job ==== | ||
+ | < | ||
To see all the available options type: | To see all the available options type: |