Skip to content
This repository was archived by the owner on May 27, 2021. It is now read-only.

Commit 6f3d8a4

Browse files
authored
Merge branch 'master' into complex-ops
2 parents 345956c + a4a56bd commit 6f3d8a4

24 files changed

+324
-194
lines changed

.gitattributes

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
CITATION.bib linguist-detectable=false
2+
test/perf/* linguist-detectable=false

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name = "CUDAnative"
22
uuid = "be33ccc6-a3ff-5ff2-a52e-74243cff1e17"
3-
version = "2.2.1"
3+
version = "2.3.1"
44

55
[deps]
66
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"

README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ CUDAnative.jl
33

44
*Support for compiling and executing native Julia kernels on CUDA hardware.*
55

6-
[![][docs-latest-img]][docs-latest-url] [![][codecov-img]][codecov-url] [![][doi-img]][doi-url]
6+
[![][docs-latest-img]][docs-latest-url] [![][discourse-img]][discourse-url] [![][codecov-img]][codecov-url] [![][doi-img]][doi-url]
77

88
[codecov-img]: https://codecov.io/gh/JuliaGPU/CUDAnative.jl/branch/master/graph/badge.svg
99
[codecov-url]: https://codecov.io/gh/JuliaGPU/CUDAnative.jl
@@ -14,6 +14,9 @@ CUDAnative.jl
1414
[doi-img]: https://zenodo.org/badge/DOI/10.1109/TPDS.2018.2872064.svg
1515
[doi-url]: https://doi.org/10.1109/TPDS.2018.2872064
1616

17+
[discourse-img]: https://img.shields.io/badge/discourse-julia%20%23gpu-red
18+
[discourse-url]: https://discourse.julialang.org/c/domain/gpu
19+
1720

1821

1922
Installation

codecov.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
coverage:
22
ignore:
3-
- "deps/*"
4-
- "src/device/*"
3+
- "src/device"
54
status:
65
patch: false
76
project: false

examples/pairwise.jl

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
# calculate pairwise distance between every point in a vector
22

33
using CUDAdrv, CUDAnative
4-
include(joinpath(@__DIR__, "..", "test", "array.jl")) # real applications: use CuArrays.jl
4+
5+
include(joinpath(@__DIR__, "..", "test", "array.jl"))
6+
const CuArray = CuTestArray # real applications: use CuArrays.jl
57

68

79
function haversine_cpu(lat1::Float32, lon1::Float32, lat2::Float32, lon2::Float32, radius::Float32)
@@ -78,12 +80,12 @@ end
7880

7981
function pairwise_dist_gpu(lat::Vector{Float32}, lon::Vector{Float32})
8082
# upload
81-
lat_gpu = CuTestArray(lat)
82-
lon_gpu = CuTestArray(lon)
83+
lat_gpu = CuArray(lat)
84+
lon_gpu = CuArray(lon)
8385

8486
# allocate
8587
n = length(lat)
86-
rowresult_gpu = CuTestArray(zeros(Float32, n, n))
88+
rowresult_gpu = CuArray(zeros(Float32, n, n))
8789

8890
# calculate launch configuration
8991
function get_config(kernel)

examples/peakflops.jl

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
using CUDAdrv, CUDAnative
2-
include(joinpath(@__DIR__, "..", "test", "array.jl")) # real applications: use CuArrays.jl
2+
3+
include(joinpath(@__DIR__, "..", "test", "array.jl"))
4+
const CuArray = CuTestArray # real applications: use CuArrays.jl
35

46
using Test
57

@@ -30,10 +32,10 @@ function peakflops(n::Integer=5000, dev::CuDevice=CuDevice(0))
3032
c = round.(rand(Float32, dims) * 100)
3133
out = similar(a)
3234

33-
d_a = CuTestArray(a)
34-
d_b = CuTestArray(b)
35-
d_c = CuTestArray(c)
36-
d_out = CuTestArray(out)
35+
d_a = CuArray(a)
36+
d_b = CuArray(b)
37+
d_c = CuArray(c)
38+
d_out = CuArray(out)
3739

3840
len = prod(dims)
3941
threads = min(len, 1024)

examples/reduce/benchmark.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ benchmark_gpu = @benchmarkable begin
4040
val = Array(gpu_output)[1]
4141
end setup=(
4242
val = nothing;
43-
gpu_input = CuTestArray($input);
44-
gpu_output = CuTestArray($output)
43+
gpu_input = CuArray($input);
44+
gpu_output = CuArray($output)
4545
) teardown=(
4646
gpu_input = nothing;
4747
gpu_output = nothing

examples/reduce/reduce.jl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
# Based on devblogs.nvidia.com/parallelforall/faster-parallel-reductions-kepler/
99

1010
using CUDAdrv, CUDAnative
11-
include(joinpath(@__DIR__, "..", "..", "test", "array.jl")) # real applications: use CuArrays.jl
11+
12+
include(joinpath(@__DIR__, "..", "..", "test", "array.jl"))
13+
const CuArray = CuTestArray # real applications: use CuArrays.jl
1214

1315

1416
#
@@ -84,7 +86,7 @@ Reduce a large array.
8486
8587
Kepler-specific implementation, ie. you need sm_30 or higher to run this code.
8688
"""
87-
function gpu_reduce(op::Function, input::CuTestArray{T}, output::CuTestArray{T}) where {T}
89+
function gpu_reduce(op::Function, input::CuArray{T}, output::CuArray{T}) where {T}
8890
len = length(input)
8991

9092
function get_config(kernel)

examples/reduce/verify.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ cpu_val = reduce(+, input)
1616

1717
# CUDAnative
1818
let
19-
gpu_input = CuTestArray(input)
20-
gpu_output = CuTestArray(output)
19+
gpu_input = CuArray(input)
20+
gpu_output = CuArray(output)
2121
gpu_reduce(+, gpu_input, gpu_output)
2222
gpu_val = Array(gpu_output)[1]
2323
@assert cpu_val == gpu_val

examples/scan.jl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
# Based on https://developer.nvidia.com/gpugems/GPUGems3/gpugems3_ch39.html
55

66
using CUDAdrv, CUDAnative
7-
include(joinpath(@__DIR__, "..", "test", "array.jl")) # real applications: use CuArrays.jl
7+
8+
include(joinpath(@__DIR__, "..", "test", "array.jl"))
9+
const CuArray = CuTestArray # real applications: use CuArrays.jl
810

911
function cpu_accumulate!(op::Function, data::Matrix{T}) where {T}
1012
cols = size(data,2)
@@ -65,7 +67,7 @@ a = rand(Int, rows, cols)
6567
cpu_a = copy(a)
6668
cpu_accumulate!(+, cpu_a)
6769

68-
gpu_a = CuTestArray(a)
70+
gpu_a = CuArray(a)
6971
@cuda blocks=cols threads=rows shmem=2*rows*sizeof(eltype(a)) gpu_accumulate!(+, gpu_a)
7072

7173
using Test

0 commit comments

Comments
 (0)