Skip to content
This repository was archived by the owner on May 27, 2021. It is now read-only.

Commit ff3b34e

Browse files
committed
Add toggle for contextualization, and disable for tests relying on names.
1 parent 39c4b7a commit ff3b34e

File tree

5 files changed

+24
-23
lines changed

5 files changed

+24
-23
lines changed

src/compiler/common.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ Base.@kwdef struct CompilerJob
77
cap::VersionNumber
88
kernel::Bool
99

10+
contextualize::Bool = true
11+
1012
# optional properties
1113
minthreads::Union{Nothing,CuDim} = nothing
1214
maxthreads::Union{Nothing,CuDim} = nothing

src/compiler/driver.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ function codegen(target::Symbol, job::CompilerJob;
6262
@timeit_debug to "validation" check_method(job)
6363

6464
@timeit_debug to "Julia front-end" begin
65-
f = contextualize(job.f)
65+
f = job.contextualize ? contextualize(job.f) : job.f
6666

6767
# get the method instance
6868
world = typemax(UInt)

src/execution.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ export @cuda, cudaconvert, cufunction, dynamic_cufunction, nearest_warpsize
99
# the code it generates, or the execution
1010
function split_kwargs(kwargs)
1111
macro_kws = [:dynamic]
12-
compiler_kws = [:minthreads, :maxthreads, :blocks_per_sm, :maxregs, :name]
12+
compiler_kws = [:minthreads, :maxthreads, :blocks_per_sm, :maxregs, :name, :contextualize]
1313
call_kws = [:cooperative, :blocks, :threads, :config, :shmem, :stream]
1414
macro_kwargs = []
1515
compiler_kwargs = []
@@ -351,6 +351,7 @@ The following keyword arguments are supported:
351351
- `maxregs`: the maximum number of registers to be allocated to a single thread (only
352352
supported on LLVM 4.0+)
353353
- `name`: override the name that the kernel will have in the generated code
354+
- `contextualize`: whether to contextualize functions using Cassette (default: true)
354355
355356
The output of this function is automatically cached, i.e. you can simply call `cufunction`
356357
in a hot path without degrading performance. New code will be generated automatically, when

test/codegen.jl

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
valid_kernel() = return
99
invalid_kernel() = 1
1010

11-
ir = sprint(io->CUDAnative.code_llvm(io, valid_kernel, Tuple{}; optimize=false, dump_module=true))
11+
ir = sprint(io->CUDAnative.code_llvm(io, valid_kernel, Tuple{}; dump_module=true,
12+
contextualize=false, optimize=false))
1213

1314
# module should contain our function + a generic call wrapper
1415
@test occursin("define void @julia_valid_kernel", ir)
@@ -21,11 +22,6 @@
2122
@test_throws CUDAnative.KernelError CUDAnative.code_llvm(devnull, invalid_kernel, Tuple{}; kernel=true) == nothing
2223
end
2324

24-
@testset "unbound typevars" begin
25-
invalid_kernel() where {unbound} = return
26-
@test_throws CUDAnative.KernelError CUDAnative.code_llvm(devnull, invalid_kernel, Tuple{})
27-
end
28-
2925
@testset "exceptions" begin
3026
foobar() = throw(DivideError())
3127
ir = sprint(io->CUDAnative.code_llvm(io, foobar, Tuple{}))
@@ -52,7 +48,7 @@ end
5248
@noinline child(i) = sink(i)
5349
parent(i) = child(i)
5450

55-
ir = sprint(io->CUDAnative.code_llvm(io, parent, Tuple{Int}))
51+
ir = sprint(io->CUDAnative.code_llvm(io, parent, Tuple{Int}; contextualize=false))
5652
@test occursin(r"call .+ @julia_child_", ir)
5753
end
5854

@@ -76,10 +72,10 @@ end
7672
x::Int
7773
end
7874

79-
ir = sprint(io->CUDAnative.code_llvm(io, kernel, Tuple{Aggregate}))
75+
ir = sprint(io->CUDAnative.code_llvm(io, kernel, Tuple{Aggregate}; contextualize=false))
8076
@test occursin(r"@julia_kernel_\d+\(({ i64 }|\[1 x i64\]) addrspace\(\d+\)?\*", ir)
8177

82-
ir = sprint(io->CUDAnative.code_llvm(io, kernel, Tuple{Aggregate}; kernel=true))
78+
ir = sprint(io->CUDAnative.code_llvm(io, kernel, Tuple{Aggregate}; contextualize=false, kernel=true))
8379
@test occursin(r"@ptxcall_kernel_\d+\(({ i64 }|\[1 x i64\])\)", ir)
8480
end
8581

@@ -135,7 +131,7 @@ end
135131
closure = ()->return
136132

137133
function test_name(f, name; kwargs...)
138-
code = sprint(io->CUDAnative.code_llvm(io, f, Tuple{}; kwargs...))
134+
code = sprint(io->CUDAnative.code_llvm(io, f, Tuple{}; contextualize=false, kwargs...))
139135
@test occursin(name, code)
140136
end
141137

@@ -221,7 +217,7 @@ end
221217
return
222218
end
223219

224-
asm = sprint(io->CUDAnative.code_ptx(io, parent, Tuple{Int64}))
220+
asm = sprint(io->CUDAnative.code_ptx(io, parent, Tuple{Int64}; contextualize=false))
225221
@test occursin(r"call.uni\s+julia_child_"m, asm)
226222
end
227223

@@ -232,7 +228,7 @@ end
232228
return
233229
end
234230

235-
asm = sprint(io->CUDAnative.code_ptx(io, entry, Tuple{Int64}; kernel=true))
231+
asm = sprint(io->CUDAnative.code_ptx(io, entry, Tuple{Int64}; contextualize=false, kernel=true))
236232
@test occursin(r"\.visible \.entry ptxcall_entry_", asm)
237233
@test !occursin(r"\.visible \.func julia_nonentry_", asm)
238234
@test occursin(r"\.func julia_nonentry_", asm)
@@ -279,15 +275,15 @@ end
279275
return
280276
end
281277

282-
asm = sprint(io->CUDAnative.code_ptx(io, parent1, Tuple{Int}))
278+
asm = sprint(io->CUDAnative.code_ptx(io, parent1, Tuple{Int}; contextualize=false))
283279
@test occursin(r".func julia_child_", asm)
284280

285281
function parent2(i)
286282
child(i+1)
287283
return
288284
end
289285

290-
asm = sprint(io->CUDAnative.code_ptx(io, parent2, Tuple{Int}))
286+
asm = sprint(io->CUDAnative.code_ptx(io, parent2, Tuple{Int}; contextualize=false))
291287
@test occursin(r".func julia_child_", asm)
292288
end
293289

@@ -357,7 +353,7 @@ end
357353
closure = ()->nothing
358354

359355
function test_name(f, name; kwargs...)
360-
code = sprint(io->CUDAnative.code_ptx(io, f, Tuple{}; kwargs...))
356+
code = sprint(io->CUDAnative.code_ptx(io, f, Tuple{}; contextualize=false, kwargs...))
361357
@test occursin(name, code)
362358
end
363359

@@ -429,7 +425,7 @@ end
429425
return
430426
end
431427

432-
ir = sprint(io->CUDAnative.code_llvm(io, kernel, Tuple{Float32,Ptr{Float32}}))
428+
ir = sprint(io->CUDAnative.code_llvm(io, kernel, Tuple{Float32,Ptr{Float32}}; contextualize=false))
433429
@test occursin("jl_box_float32", ir)
434430
CUDAnative.code_ptx(devnull, kernel, Tuple{Float32,Ptr{Float32}})
435431
end
@@ -444,18 +440,20 @@ end
444440

445441
# some validation happens in the emit_function hook, which is called by code_llvm
446442

443+
# NOTE: contextualization changes order of frames
447444
@testset "recursion" begin
448445
@eval recurse_outer(i) = i > 0 ? i : recurse_inner(i)
449446
@eval @noinline recurse_inner(i) = i < 0 ? i : recurse_outer(i)
450447

451-
@test_throws_message(CUDAnative.KernelError, CUDAnative.code_llvm(devnull, recurse_outer, Tuple{Int})) do msg
448+
@test_throws_message(CUDAnative.KernelError, CUDAnative.code_llvm(devnull, recurse_outer, Tuple{Int}; contextualize=false)) do msg
452449
occursin("recursion is currently not supported", msg) &&
453450
occursin("[1] recurse_outer", msg) &&
454451
occursin("[2] recurse_inner", msg) &&
455452
occursin("[3] recurse_outer", msg)
456453
end
457454
end
458455

456+
# FIXME: contextualization removes all frames here -- changed inlining behavior?
459457
@testset "base intrinsics" begin
460458
foobar(i) = sin(i)
461459

test/device/execution.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,9 @@ end
7070
@test_throws ErrorException @device_code_lowered nothing
7171

7272
# make sure kernel name aliases are preserved in the generated code
73-
@test occursin("ptxcall_dummy", sprint(io->(@device_code_llvm io=io @cuda dummy())))
74-
@test occursin("ptxcall_dummy", sprint(io->(@device_code_ptx io=io @cuda dummy())))
75-
@test occursin("ptxcall_dummy", sprint(io->(@device_code_sass io=io @cuda dummy())))
73+
@test occursin("ptxcall_dummy", sprint(io->(@device_code_llvm io=io @cuda contextualize=false dummy())))
74+
@test occursin("ptxcall_dummy", sprint(io->(@device_code_ptx io=io @cuda contextualize=false dummy())))
75+
@test occursin("ptxcall_dummy", sprint(io->(@device_code_sass io=io @cuda contextualize=false dummy())))
7676

7777
# make sure invalid kernels can be partially reflected upon
7878
let
@@ -96,7 +96,7 @@ end
9696

9797
# set name of kernel
9898
@test occursin("ptxcall_mykernel", sprint(io->(@device_code_llvm io=io begin
99-
k = cufunction(dummy, name="mykernel")
99+
k = cufunction(dummy; name="mykernel", contextualize=false)
100100
k()
101101
end)))
102102
end

0 commit comments

Comments
 (0)