Skip to content

Commit 76791f4

Browse files
authored
Merge pull request #281 from intel/develop
Develop
2 parents b57fda6 + e89fdab commit 76791f4

File tree

11 files changed

+71
-56
lines changed

11 files changed

+71
-56
lines changed

README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,14 @@ and Intel(R) graphics processors.
4848
to make the generated code easier for humans to read.
4949
You'll get a warning when running `make` if one of these doesn't exist.
5050
Everything will still work, but the generated code will be difficult to read.
51-
Reading the generated code is only necessary for debug or curiosity.
51+
Reading the generated code is only necessary for debug, performance analysis, etc.
5252
* SWIG (4.0.0 or later):
5353
http://www.swig.org, for creating the Python interface.
5454
* Python 3 (3.6.1 or later):
5555
https://www.python.org/downloads, for creating and using the Python interface.
56+
Included with Intel(R) oneAPI HPC Toolkit.
57+
* Python `numpy` package for running Python interface tests.
58+
Included with Intel(R) oneAPI HPC Toolkit.
5659
* Doxygen (1.9.0 or later):
5760
https://www.doxygen.nl, for creating updated API documentation.
5861
If you're not changing the API documentation, you can view the existing documentation

docs/YASK-tutorial.pdf

38.2 KB
Binary file not shown.

src/common/common_utils.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ namespace yask {
4444
// for numbers above 9 (at least up to 99).
4545

4646
// Format: "major.minor.patch[-alpha|-beta]".
47-
const string version = "4.04.04";
47+
const string version = "4.04.05";
4848

4949
string yask_get_version_string() {
5050
return version;
@@ -57,10 +57,12 @@ namespace yask {
5757

5858
// Return num with SI multiplier and "iB" suffix,
5959
// e.g., 412KiB.
60+
// Use only for storage bytes, e.g., not for
61+
// rates like bytes/sec.
6062
string make_byte_str(size_t nbytes)
6163
{
6264
if (!is_suffix_print_enabled)
63-
return to_string(nbytes);
65+
return to_string(nbytes) + " Bytes";
6466

6567
ostringstream os;
6668
double num = double(nbytes);
@@ -89,7 +91,8 @@ namespace yask {
8991
}
9092

9193
// Return num with SI multiplier, e.g. "3.14M".
92-
// Use this one for rates, etc.
94+
// Use this one for printing any number that is
95+
// not number of storage bytes.
9396
string make_num_str(idx_t num) {
9497
if (!is_suffix_print_enabled || (num > -1000 && num < 1000))
9598
return to_string(num);

src/common/tuple.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -202,8 +202,8 @@ namespace yask {
202202
size_t prev_size = 1;
203203

204204
// Loop thru dims.
205-
int start_dim = _first_inner ? 0 : size()-1;
206-
int end_dim = _first_inner ? size() : -1;
205+
int start_dim = _first_inner ? 0 : get_num_dims()-1;
206+
int end_dim = _first_inner ? get_num_dims() : -1;
207207
int step_dim = _first_inner ? 1 : -1;
208208
for (int di = start_dim; di != end_dim; di += step_dim) {
209209
auto& i = _q.at(di);
@@ -242,8 +242,8 @@ namespace yask {
242242
size_t prev_size = 1;
243243

244244
// Loop thru dims.
245-
int start_dim = _first_inner ? 0 : size()-1;
246-
int stop_dim = _first_inner ? size() : -1;
245+
int start_dim = _first_inner ? 0 : get_num_dims()-1;
246+
int stop_dim = _first_inner ? get_num_dims() : -1;
247247
int step_dim = _first_inner ? 1 : -1;
248248
for (int di = start_dim; di != stop_dim; di += step_dim) {
249249
auto& i = _q.at(di);

src/compiler/Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,8 @@ YC_CXX := $(CXX)
6262
YC_CXXOPT ?= -O2
6363
YC_CXXDBG ?= -g
6464
YC_CXXFLAGS :=
65-
YC_CXXFLAGS_API := -std=c++17
66-
YC_CXXFLAGS_API += -Wall -Wno-unknown-pragmas -Wno-unused-variable
65+
YC_CXXWARN := -Wall -Wno-unknown-pragmas -Wno-unused-variable
66+
YC_CXXFLAGS_API := -std=c++17 $(YC_CXXWARN)
6767
YC_CXX_INCFLAGS := $(addprefix -I,$(YC_INC_DIRS))
6868
YC_CXX_INCFLAGS_API := $(addprefix -I,$(INC_DIR))
6969

src/kernel/Makefile

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -31,20 +31,23 @@
3131
YASK_BASE ?= $(abspath ../..)
3232
include $(YASK_BASE)/src/common/common.mk
3333

34-
# Initial default settings for the YASK kernel library.
34+
# Default settings for invoking the YASK compiler.
3535
# These can be overridden on the 'make' command-line.
36-
# See src/common/common.mk for more setting vars.
37-
numa ?= 1
38-
allow_new_var_types ?= 1
39-
streaming_stores ?= 0
40-
use_rcp ?= 0
4136
use_ptrs ?= 1
4237
use_safe_ptrs ?= 0
4338
outer_domain_layout ?= 0
4439
inner_misc_layout ?= 1
4540
first_inner ?= 1
4641
early_loads ?= 0
4742
min_buffer_len ?= 1
43+
44+
# Default settings for the YASK kernel library.
45+
# These can be overridden on the 'make' command-line.
46+
# See src/common/common.mk for more setting vars.
47+
numa ?= 1
48+
allow_new_var_types ?= 1
49+
streaming_stores ?= 0
50+
use_rcp ?= 0
4851
trace ?= 0
4952
trace_mem ?= 0
5053
check ?= 0
@@ -56,9 +59,7 @@ check ?= 0
5659

5760
# YASK compiler settings for offload.
5861
ifeq ($(offload),1)
59-
pfd_l1 := 0
60-
pfd_l2 := 0
61-
inner_loop_dim := 1
62+
inner_loop_dim := 1
6263
outer_domain_layout := 1
6364
endif
6465

@@ -87,7 +88,7 @@ ifeq ($(TARGET),knl)
8788
VEC_MACROS += USE_RCP28
8889
endif
8990
MACROS += NUMA_PREF=1
90-
pfd_l1 ?= 1
91+
pfd_l1 := 1
9192

9293
else ifeq ($(TARGET),avx512)
9394

@@ -156,18 +157,18 @@ endif
156157
ifneq ($(step_dim),)
157158
YC_FLAGS += -step-dim $(step_dim)
158159
endif
159-
ifneq ($(pfd_l1),)
160-
YC_FLAGS += -l1-prefetch-dist $(pfd_l1)
161-
endif
162-
ifneq ($(pfd_l2),)
163-
YC_FLAGS += -l2-prefetch-dist $(pfd_l2)
164-
endif
165160
ifneq ($(inner_loop_dim),)
166161
YC_FLAGS += -inner-loop-dim $(inner_loop_dim)
167162
endif
168163
ifneq ($(min_buffer_len),)
169164
YC_FLAGS += -min-buffer-len $(min_buffer_len)
170165
endif
166+
ifneq ($(pfd_l1),)
167+
YC_FLAGS += -l1-prefetch-dist $(pfd_l1)
168+
endif
169+
ifneq ($(pfd_l2),)
170+
YC_FLAGS += -l2-prefetch-dist $(pfd_l2)
171+
endif
171172

172173
# Stencil compiler flags that are boolean.
173174
ifeq ($(use_ptrs),1)
@@ -351,7 +352,7 @@ endif
351352
# Compiler-specific settings.
352353

353354
# Create a compiler invocation to test for macro settings.
354-
YK_CXX_TEST := $(YK_CXX)
355+
YK_CXX_TEST := $(YK_CXXCMD)
355356
cxx_is_llvm_intel := $(call MACRO_DEF,$(YK_CXX_TEST),__INTEL_LLVM_COMPILER)
356357
cxx_is_clang := $(call MACRO_DEF,$(YK_CXX_TEST),__clang__)
357358
cxx_is_intel := $(call MACRO_DEF,$(YK_CXX_TEST),__INTEL_COMPILER)
@@ -512,7 +513,8 @@ NANO_BLOCK_LOOP_ORDER ?= DOMAIN_LOOP_DIMS
512513
ifeq ($(offload),1)
513514
NANO_BLOCK_LOOP_OMP ?= omp target teams distribute thread_limit(thread_limit) device(KernelEnv::_omp_devn)
514515
NANO_BLOCK_LOOP_FLAGS += -omp '$(NANO_BLOCK_LOOP_OMP)'
515-
NANO_BLOCK_LOOP_CODE := $(NANO_BLOCK_LOOP_MODS) omp loop($(NANO_BLOCK_LOOP_ORDER)) { }
516+
NANO_BLOCK_LOOP_MODS += omp
517+
NANO_BLOCK_LOOP_CODE := $(NANO_BLOCK_LOOP_MODS) loop($(NANO_BLOCK_LOOP_ORDER)) { }
516518
else
517519
NANO_BLOCK_LOOP_CODE := $(NANO_BLOCK_LOOP_MODS) loop($(NANO_BLOCK_LOOP_ORDER)) { }
518520
endif

src/kernel/lib/alloc.cpp

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,14 @@ namespace yask {
103103
else if (numa_available() != -1) {
104104
numa_set_bind_policy(0);
105105
if (numa_pref >= 0 && numa_pref <= numa_max_node())
106-
numa_alloc_onnode(nbytes, numa_pref);
106+
p = numa_alloc_onnode(nbytes, numa_pref);
107107
else
108-
numa_alloc_local(nbytes);
108+
p = numa_alloc_local(nbytes);
109109
// Interleaved not available.
110+
111+
if (!p)
112+
THROW_YASK_EXCEPTION("cannot allocate " + make_byte_str(nbytes) +
113+
" using numa-node (or policy) " + to_string(numa_pref));
110114
}
111115
else
112116
THROW_YASK_EXCEPTION("explicit NUMA policy allocation is not available");
@@ -158,15 +162,6 @@ namespace yask {
158162

159163
#endif // not USE_NUMA_POLICY_LIB.
160164

161-
#else
162-
THROW_YASK_EXCEPTION("NUMA allocation is not enabled; build with numa=1");
163-
#endif // USE_NUMA.
164-
165-
// Should not get here w/null p; throw exception.
166-
if (!p)
167-
THROW_YASK_EXCEPTION("cannot allocate " + make_byte_str(nbytes) +
168-
" using numa-node (or policy) " + to_string(numa_pref));
169-
170165
// Check alignment.
171166
if ((size_t(p) & (CACHELINE_BYTES - 1)) != 0)
172167
FORMAT_AND_THROW_YASK_EXCEPTION("NUMA-allocated " << p << " is not " <<
@@ -177,6 +172,10 @@ namespace yask {
177172

178173
// Return as a char* as required for shared_ptr ctor.
179174
return static_cast<char*>(p);
175+
176+
#else
177+
THROW_YASK_EXCEPTION("NUMA allocation is not enabled; build with numa=1");
178+
#endif // USE_NUMA.
180179
}
181180

182181
// Reverse numa_alloc().
@@ -217,38 +216,36 @@ namespace yask {
217216

218217
void *p = 0;
219218

219+
#ifdef USE_OFFLOAD
220+
THROW_YASK_EXCEPTION("mapping offload device memory to shm not yet supported; "
221+
"use '-no-use_shm' option");
222+
220223
// Allocate using MPI shm.
221-
#ifdef USE_MPI
224+
#elif defined(USE_MPI)
222225
assert(shm_comm);
223226
assert(shm_win);
224227
MPI_Info win_info;
225228
MPI_Info_create(&win_info);
226229
MPI_Info_set(win_info, "alloc_shared_noncontig", "true");
227230
MPI_Win_allocate_shared(nbytes, 1, win_info, *shm_comm, &p, shm_win);
228-
MPI_Info_free(&win_info);
229-
MPI_Win_lock_all(0, *shm_win);
230-
#else
231-
THROW_YASK_EXCEPTION("MPI shm allocation is not enabled; build with mpi=1");
232-
#endif
233-
234231
if (!p)
235232
THROW_YASK_EXCEPTION("cannot allocate " + make_byte_str(nbytes) +
236233
" using MPI shm");
234+
MPI_Info_free(&win_info);
235+
MPI_Win_lock_all(0, *shm_win);
237236

238237
// Check alignment.
239238
if ((size_t(p) & (CACHELINE_BYTES - 1)) != 0)
240239
FORMAT_AND_THROW_YASK_EXCEPTION("MPI shm-allocated " << p << " is not " <<
241240
CACHELINE_BYTES << "-byte aligned");
242241

243-
#ifdef USE_OFFLOAD
244-
THROW_YASK_EXCEPTION("mapping offload device memory to shm not yet supported; "
245-
"use '-no-use_shm'");
246-
#endif
247-
248242
// Cannot typically use huge pages for shm, so not calling set_huge().
249243

250244
// Return as a char* as required for shared_ptr ctor.
251245
return static_cast<char*>(p);
246+
#else
247+
THROW_YASK_EXCEPTION("MPI shm allocation is not enabled; build with mpi=1");
248+
#endif
252249
}
253250

254251
// Reverse shm_alloc().

src/kernel/lib/auto_tuner.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -431,8 +431,6 @@ namespace yask {
431431
}
432432
} // beyond next neighbor of center.
433433
} // while(true) search for new setting to try.
434-
435-
THROW_YASK_EXCEPTION("(internal fault) exited from infinite loop");
436434
} // eval.
437435

438436
// Apply best settings if avail, and adjust other settings.

src/kernel/lib/indices.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,13 +544,15 @@ namespace yask {
544544
static_assert(std::is_trivially_copyable<Indices>::value,
545545
"Needed for OpenMP offload");
546546

547+
#if 0
547548
// Define OMP reductions on Indices.
548549
#pragma omp declare reduction(min_idxs : Indices : \
549550
omp_out = omp_out.min_elements(omp_in) ) \
550551
initializer (omp_priv = omp_orig)
551552
#pragma omp declare reduction(max_idxs : Indices : \
552553
omp_out = omp_out.max_elements(omp_in) ) \
553554
initializer (omp_priv = omp_orig)
555+
#endif
554556

555557
// Layout base class.
556558
// This class hierarchy is NOT virtual.

src/kernel/yask.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ while true; do
181181
echo " -v"
182182
echo " Shortcut for the following options:"
183183
echo " $val"
184+
echo " Adds '/tests' to path of log_dir."
184185
echo " If you want to override any of these values, place them after '-v'."
185186
echo " -show_arch"
186187
echo " Print the default architecture string and exit."
@@ -357,6 +358,9 @@ dump="head -v -n -0"
357358
# Init log file.
358359
: ${logfile:=yask.$stencil.$arch.$exe_host.n$nnodes.r$nranks.`date +%Y-%m-%d_%H-%M-%S`_p$$.log}
359360
if [[ -n "$logdir" ]]; then
361+
if [[ $doval == 1 ]]; then
362+
logdir="$logdir/tests"
363+
fi
360364
logfile="$logdir/$logfile"
361365
fi
362366
echo "Writing log to '$logfile'."

0 commit comments

Comments
 (0)