54
54
def_arch=intel64
55
55
fi
56
56
arch=$def_arch
57
+ arch_is_def_cpu=1 # arch has been set for CPU default only.
57
58
58
59
# Default nodes.
59
60
nnodes=1
60
- if [[ ! -z ${SLURM_JOB_NUM_NODES : +x} ]]; then
61
- nnodes=$SLURM_JOB_NUM_NODES
61
+ if [[ ! -z ${SLURM_NNODES : +x} ]]; then
62
+ nnodes=$SLURM_NNODES
62
63
fi
63
64
65
+ # Default type and number of GPUs.
66
+ ngpus=0
67
+ arch_offload=" offload"
68
+ if command -v xpu-smi > /dev/null; then
69
+ arch_offload=" $def_arch .offload-spir64"
70
+ ngpus=` xpu-smi topology -m | grep -c ' ^GPU' `
71
+ elif command -v nvidia-smi > /dev/null; then
72
+ arch_offload=" $def_arch .offload-nv"
73
+ ngpus=` nvidia-smi topo -m | grep -c ' ^GPU' `
74
+ fi
75
+ is_offload=0
76
+
64
77
# Default MPI ranks.
65
78
# Try Slurm var, then numactl, then lscpu.
66
79
# For latter two, the goal is to count only NUMA nodes with CPUs.
67
80
# (Systems with HBM may have NUMA nodes without CPUs.)
68
- nranks=1
81
+ nranks=$nnodes
82
+ nranks_is_def_cpu=1 # nranks has been set for CPU default only.
69
83
if [[ ! -z ${SLURM_NTASKS: +x} && $SLURM_NTASKS > $nnodes ]]; then
70
84
nranks=$SLURM_NTASKS
85
+ nranks_is_def_cpu=0
71
86
elif command -v numactl > /dev/null; then
72
87
ncpubinds=` numactl -s | awk ' /^cpubind:/ { print NF-1 }' `
73
88
if [[ -n " $ncpubinds " ]]; then
@@ -79,10 +94,15 @@ elif command -v lscpu >/dev/null; then
79
94
nranks=$(( $nnumas * $nnodes ))
80
95
fi
81
96
fi
97
+ nranks_offload=$nnodes
98
+ if [[ $ngpus > 0 ]]; then
99
+ nranks_offload=$(( $ngpus * $nnodes ))
100
+ fi
101
+ force_mpi=0
82
102
83
103
# Other defaults.
84
- pre_cmd=" :"
85
- post_cmd=" "
104
+ pre_cmd=" :" # "colon" command is a no-op.
105
+ post_cmd=" : "
86
106
helping=0
87
107
opts=" "
88
108
bindir=` dirname $0 `
@@ -127,13 +147,17 @@ while true; do
127
147
echo " This will run the YASK executable with the '-help' option."
128
148
echo " -arch <name>"
129
149
echo " Specify the architecture-name part of the YASK executable."
130
- echo " Overrides the default architecture determined from /proc/cpuinfo flags. "
131
- echo " The default arch for this host is ' $def_arch ' ."
150
+ echo " Overrides the default architecture determined from /proc/cpuinfo flags"
151
+ echo " or the GPU system manangement software for offload kernels ."
132
152
echo " Should correspond to arch=<name> used during compilation"
133
- echo " with '.offload-<offload_arch>' appended when built with 'offload=1',"
153
+ echo " with '.offload-<offload_arch>' appended if built with 'offload=1',"
134
154
echo " or YK_ARCH=<name> if that was used to override the default."
135
155
echo " In any case, the '-stencil' and '-arch' args required to launch"
136
156
echo " any executable are printed at the end of a successful compilation."
157
+ echo " The default for this host is '$def_arch ' for CPU kernels"
158
+ echo " and '$arch_offload ' for offload kernels."
159
+ echo " -offload"
160
+ echo " Use an offloaded YASK kernel executable, built with 'offload=1'."
137
161
echo " -host <hostname>"
138
162
echo " Specify host to run YASK executable on."
139
163
echo " Run sub-shell under 'ssh <hostname>'."
@@ -149,24 +173,27 @@ while true; do
149
173
echo " Run YASK executable as an argument to <command>, e.g., 'numactl -N 0'."
150
174
echo " -mpi_cmd <command>"
151
175
echo " Run YASK executable as an argument to <command>, e.g., 'mpiexec.hydra -n 4'."
152
- echo " If -mpi_cmd is used, the -ranks option is used only for computing the"
153
- echo " default number of OpenMP threads to use."
154
176
echo " If -mpi_cmd and -exe_prefix are both specified, this one is used first."
155
- echo " -ranks <N>"
156
- echo " Run the YASK executable on <N> MPI ranks."
157
- echo " Shortcut for the following option if <N> > 1:"
158
- echo " -mpi_cmd 'mpirun -np <N>'"
159
- echo " If a different MPI command is needed, use -mpi_cmd <command> explicitly."
160
- echo " If the env var SLURM_NTASKS is set AND if it greater than the number of nodes,"
161
- echo " the default is its value."
162
- echo " Otherwise, the default is based on the number of NUMA nodes on the current host."
163
- echo " The current default is $nranks ."
177
+ echo " The default command is based on the number of nodes and ranks (see below)."
178
+ echo " -force_mpi"
179
+ echo " Generate a default 'mpirun' prefix even if there is only 1 rank to run."
164
180
echo " -nodes <N>"
165
181
echo " Set the number of nodes."
166
- echo " This is used to compute the default number of OpenMP threads to use per rank."
167
- echo " If the env var SLURM_JOB_NUM_NODES is set, the default is its value."
182
+ echo " If the env var SLURM_NNODES is set, the default is its value."
168
183
echo " Otherwise, the default is one (1)."
169
184
echo " The current default is $nnodes ."
185
+ echo " -ranks <R>"
186
+ echo " Run the YASK executable on <R> MPI ranks."
187
+ echo " This value, along with the number of nodes, <N>, is used to set these defaults:"
188
+ echo " - Number of MPI ranks per node to <R>/<N>."
189
+ echo " - Number of OpenMP threads per rank based on core count (for CPU kernels only)."
190
+ echo " - Default MPI command to 'mpirun -np <R> -ppn <R>/<N>'."
191
+ echo " If a different MPI command is needed, use -mpi_cmd <command> explicitly."
192
+ echo " If the env var SLURM_NTASKS is set AND if it greater than the number of nodes,"
193
+ echo " the default is its value."
194
+ echo " Otherwise, the default is based on the number of NUMA nodes on the current host"
195
+ echo " for CPU kernels or the number of GPUs on the current host for offload kernels."
196
+ echo " The current default is $nranks for CPU kernels and $nranks_offload for offload kernels."
170
197
echo " -pre_cmd <command(s)>"
171
198
echo " One or more commands to run before YASK executable."
172
199
echo " -post_cmd <command(s)>"
@@ -200,6 +227,7 @@ while true; do
200
227
elif [[ " $1 " == " -help" ]]; then
201
228
helping=1
202
229
nranks=1
230
+ nranks_is_def_cpu=0
203
231
logfile=' /dev/null'
204
232
205
233
# Pass option to executable.
@@ -217,7 +245,15 @@ while true; do
217
245
218
246
elif [[ " $1 " == " -arch" && -n ${2+set} ]]; then
219
247
arch=$2
248
+ arch_is_def_cpu=0
249
+ if [[ $arch =~ " offload" ]]; then
250
+ is_offload=1
251
+ fi
252
+ shift
220
253
shift
254
+
255
+ elif [[ " $1 " == " -offload" ]]; then
256
+ is_offload=1
221
257
shift
222
258
223
259
elif [[ " $1 " == " -host" && -n ${2+set} ]]; then
@@ -271,9 +307,14 @@ while true; do
271
307
272
308
elif [[ " $1 " == " -ranks" && -n ${2+set} ]]; then
273
309
nranks=$2
310
+ nranks_is_def_cpu=0
274
311
shift
275
312
shift
276
313
314
+ elif [[ " $1 " == " -force_mpi" ]]; then
315
+ force_mpi=1
316
+ shift
317
+
277
318
elif [[ " $1 " == " -nodes" && -n ${2+set} ]]; then
278
319
nnodes=$2
279
320
shift
@@ -332,9 +373,24 @@ if [[ -z ${stencil:+x} ]]; then
332
373
show_stencils
333
374
fi
334
375
376
+ # Offload settings.
377
+ if [[ $is_offload == 1 ]]; then
378
+
379
+ # Heuristics for MPI ranks for offload.
380
+ if [[ $nranks_is_def_cpu == 1 ]]; then
381
+ nranks=$nranks_offload
382
+ fi
383
+
384
+ # Heuristics for offload arch.
385
+ if [[ $arch_is_def_cpu == 1 ]]; then
386
+ arch=$arch_offload
387
+ fi
388
+ fi
389
+
335
390
# Set MPI command default.
336
- if [[ $nranks > 1 ]]; then
337
- : ${mpi_cmd=" mpirun -np $nranks " }
391
+ ppn=$(( $nranks / $nnodes ))
392
+ if [[ $nranks > 1 || $force_mpi == 1 ]]; then
393
+ : ${mpi_cmd=" mpirun -np $nranks -ppn $ppn " }
338
394
339
395
# Add default Intel MPI settings.
340
396
envs+=" I_MPI_PRINT_VERSION=1 I_MPI_DEBUG=5"
@@ -375,12 +431,6 @@ tag=$stencil.$arch
375
431
make_report=" $bindir /../build/yask_kernel.$tag .make-report.txt"
376
432
yc_report=" $bindir /../build/yask_kernel.$tag .yask_compiler-report.txt"
377
433
378
- # Heuristic to determine if this is an offload kernel.
379
- is_offload=0
380
- if [[ $arch =~ " offload" ]]; then
381
- is_offload=1
382
- fi
383
-
384
434
# Double-check that exe exists.
385
435
if [[ ! -x $exe ]]; then
386
436
echo " error: '$exe ' not found or not executable." | tee -a $logfile
420
470
# Output some vars.
421
471
echo " Num nodes:" $nnodes | tee -a $logfile
422
472
echo " Num MPI ranks:" $nranks | tee -a $logfile
423
- echo " Num MPI ranks per node:" $(( $nranks / $nnodes )) | tee -a $logfile
473
+ echo " Num MPI ranks per node:" $ppn | tee -a $logfile
424
474
echo " sh_prefix='$sh_prefix '" | tee -a $logfile
425
475
echo " mpi_cmd='$mpi_cmd '" | tee -a $logfile
426
476
echo " exe_prefix='$exe_prefix '" | tee -a $logfile
427
477
echo " exe='$exe '" | tee -a $logfile
428
478
echo " pre_cmd='$pre_cmd '" | tee -a $logfile
429
479
echo " post_cmd='$post_cmd '" | tee -a $logfile
430
480
431
- # Output the SLURM env.
432
- if [[ ` env | grep -c SLURM` > 0 ]]; then
433
- echo " Slurm vars:" | tee -a $logfile
434
- env | grep -E ' SBATCH|SLURM'
435
- fi
436
-
437
481
# Dump most recent reports.
438
482
if [[ -e $make_report ]]; then
439
483
$dump $make_report >> $logfile
@@ -448,34 +492,62 @@ if [[ $doval == 1 ]]; then
448
492
fi
449
493
450
494
# Commands to capture some important system status and config info for benchmark documentation.
451
- config_cmds=" sleep 1; uptime; lscpu; cpuinfo -A; sed '/^$/q' /proc/cpuinfo; cpupower frequency-info; uname -a; $dump /etc/system-release; $dump /proc/cmdline; $dump /proc/meminfo; free -gt; numactl -H; ulimit -a; ipcs -l; module list; env | awk '/YASK/ { print \" env:\" , \$ 1 }'"
495
+ config_cmds=" sleep 1"
496
+ if command -v module > /dev/null; then
497
+ config_cmds+=" ; module list"
498
+ fi
499
+ config_cmds+=" ; echo 'Selected env vars:'; env | sort | awk '/YASK|SLURM|SBATCH|MPI|OMP/ { print \" env:\" , \$ 1 }'"
500
+
501
+ config_cmds+=" ; set -x" # Start echoing commands before running them.
502
+ config_cmds+=" ; uptime; uname -a; ulimit -a; $dump /etc/system-release; $dump /proc/cmdline; $dump /proc/meminfo; free -gt"
503
+ if [[ -r /etc/system-release ]]; then
504
+ config_cmds+=" ; $dump /etc/system-release"
505
+ fi
506
+ if command -v lscpu > /dev/null; then
507
+ config_cmds+=" ; lscpu"
508
+ fi
509
+ if command -v cpuinfo > /dev/null; then
510
+ config_cmds+=" ; cpuinfo -A"
511
+ fi
512
+ if command -v cpupower > /dev/null; then
513
+ config_cmds+=" ; cpupower frequency-info"
514
+ fi
515
+ config_cmds+=" ; sed '/^$/q' /proc/cpuinfo"
516
+ if command -v numactl > /dev/null; then
517
+ config_cmds+=" ; numactl -H"
518
+ fi
519
+ if command -v ipcx > /dev/null; then
520
+ config_cmds+=" ; ipcs -l"
521
+ fi
452
522
453
523
# Add settings for offload kernel.
454
524
if [[ $is_offload == 1 ]]; then
455
- config_cmds+=" ; clinfo -l" ;
456
- if [[ $nranks > 1 ]]; then
457
- envs+=" I_MPI_OFFLOAD_TOPOLIB=level_zero I_MPI_OFFLOAD=2"
458
- else
459
- envs+=" EnableImplicitScaling=1"
525
+ if command -v clinfo > /dev/null; then
526
+ config_cmds+=" ; clinfo -l" ;
527
+ fi
528
+ if command -v xpu-smi > /dev/null; then
529
+ config_cmds+=" ; xpu-smi discovery; xpu-smi topology -m" ;
530
+ fi
531
+ if command -v nvidia-smi > /dev/null; then
532
+ config_cmds+=" ; nvidia-smi" ;
533
+ fi
534
+ if [[ ! -z " $mpi_cmd " ]]; then
535
+ envs+=" I_MPI_OFFLOAD=2"
460
536
fi
461
537
fi
462
538
463
539
# Command sequence to be run in a shell.
464
540
exe_str=" $mpi_cmd $exe_prefix $exe $opts "
465
- cmds=" cd $dir ; ulimit -s unlimited; $config_cmds ; ldd $exe ; date; $pre_cmd ; env $envs $envs2 $exe_str "
466
- if [[ -n " $post_cmd " ]]; then
467
- cmds+=" ; $post_cmd "
468
- fi
469
- cmds+=" ; date"
541
+ cmds=" cd $dir ; ulimit -s unlimited; $config_cmds ; ldd $exe ; date; $pre_cmd ; env $envs $envs2 $exe_str ; $post_cmd ; date"
470
542
471
543
# Finally, invoke the binary in a shell.
472
544
if [[ $dodry == 0 ]]; then
473
545
echo " ===================" | tee -a $logfile
474
546
if [[ -z " $sh_prefix " ]]; then
475
- sh -c -x " $cmds " 2>&1 | tee -a $logfile
547
+ sh -c " $cmds " 2>&1 | tee -a $logfile
476
548
else
477
549
echo " Running shell under '$sh_prefix '..."
478
- $sh_prefix " sh -c -x '$cmds '" 2>&1 | tee -a $logfile
550
+ $sh_prefix " sh -c '$cmds '" 2>&1 | tee -a $logfile
479
551
fi
480
552
echo " ===================" | tee -a $logfile
481
553
fi
0 commit comments