18
18
#include < thrust/device_vector.h>
19
19
#include < thrust/host_vector.h>
20
20
21
- // for now, once finufft is demacroized we can test float
22
- using T = double ;
23
-
24
21
template <typename T, typename V> bool equal (V *d_vec, T *cpu, const std::size_t size) {
25
22
// copy d_vec to cpu
26
23
thrust::host_vector<T> h_vec (size);
@@ -75,10 +72,10 @@ auto almost_equal(V *d_vec,
75
72
assert (cudaMemcpy (h_vec.data (), d_vec, size * sizeof (T), cudaMemcpyDeviceToHost) ==
76
73
cudaSuccess);
77
74
// print h_vec and cpu
78
- // for (std::size_t i = 0; i < size; ++i) {
79
- // std::cout << "gpu[" << i << "]: " << h_vec[i] << " cpu[" << i << "]: " << cpu[i]
80
- // << '\n';
81
- // }
75
+ for (std::size_t i = 0 ; i < size; ++i) {
76
+ std::cout << " gpu[" << i << " ]: " << h_vec[i] << " cpu[" << i << " ]: " << cpu[i]
77
+ << ' \n ' ;
78
+ }
82
79
std::cout << " relerrtwonorm: " << infnorm (h_vec.data (), cpu, size) << std::endl;
83
80
// compare the l2 norm of the difference between the two vectors
84
81
if (relerrtwonorm (h_vec.data (), cpu, size) < tol) {
@@ -88,32 +85,39 @@ auto almost_equal(V *d_vec,
88
85
}
89
86
90
87
int main () {
88
+ // for now, once finufft is demacroized we can test float
89
+ using test_t = double ;
90
+
91
91
// defaults. tests should shadow them to override
92
92
cufinufft_opts opts;
93
93
cufinufft_default_opts (&opts);
94
- opts.debug = 2 ;
94
+ opts.debug = 2 ;
95
+ opts.upsampfac = 1.25 ;
96
+ opts.gpu_kerevalmeth = 1 ;
95
97
// opts.gpu_sort = 0;
96
98
finufft_opts fin_opts;
97
99
finufft_default_opts (&fin_opts);
98
100
fin_opts.debug = 2 ;
99
101
fin_opts.spread_kerevalmeth = 1 ;
102
+ fin_opts.upsampfac = 1.25 ;
100
103
const int iflag = 1 ;
101
104
const int ntransf = 1 ;
102
105
const int dim = 3 ;
103
106
const double tol = 1e-9 ;
104
- const int N = 1023 ;
107
+ const int n_modes[] = {10 , 5 , 3 };
108
+ const int N = n_modes[0 ] * n_modes[1 ] * n_modes[2 ];
105
109
const int M = 1000 ;
106
110
const double bandwidth = 50.0 ;
107
111
108
- thrust::host_vector<T > x (M * ntransf), y (M * ntransf), z (M * ntransf), s (N * ntransf),
109
- t (N * ntransf), u (N * ntransf);
110
- thrust::host_vector<std::complex<T >> c (M * ntransf), fk (N * ntransf);
112
+ thrust::host_vector<test_t > x (M * ntransf), y (M * ntransf), z (M * ntransf),
113
+ s (N * ntransf), t (N * ntransf), u (N * ntransf);
114
+ thrust::host_vector<std::complex<test_t >> c (M * ntransf), fk (N * ntransf);
111
115
112
- thrust::device_vector<T > d_x{}, d_y{}, d_z{}, d_s{}, d_t {}, d_u{};
113
- thrust::device_vector<std::complex<T >> d_c (M * ntransf), d_fk (N * ntransf);
116
+ thrust::device_vector<test_t > d_x{}, d_y{}, d_z{}, d_s{}, d_t {}, d_u{};
117
+ thrust::device_vector<std::complex<test_t >> d_c (M * ntransf), d_fk (N * ntransf);
114
118
115
119
std::default_random_engine eng (42 );
116
- std::uniform_real_distribution<T > dist11 (-1 , 1 );
120
+ std::uniform_real_distribution<test_t > dist11 (-1 , 1 );
117
121
auto rand_util_11 = [&eng, &dist11]() {
118
122
return dist11 (eng);
119
123
};
@@ -161,11 +165,12 @@ int main() {
161
165
cudaDeviceSynchronize ();
162
166
163
167
const auto cpu_planer =
164
- [iflag, tol, ntransf, dim, M, N, &x, &y, &z, &s, &t, &u, &fin_opts](
168
+ [iflag, tol, ntransf, dim, M, N, n_modes, &x, &y, &z, &s, &t, &u, &fin_opts](
165
169
const auto type) {
166
170
finufft_plan_s *plan{nullptr };
167
- assert (finufft_makeplan (
168
- type, dim, nullptr , iflag, ntransf, tol, &plan, &fin_opts) == 0 );
171
+ std::int64_t nl[] = {n_modes[0 ], n_modes[1 ], n_modes[2 ]};
172
+ assert (
173
+ finufft_makeplan (type, dim, nl, iflag, ntransf, tol, &plan, &fin_opts) == 0 );
169
174
assert (finufft_setpts (plan, M, x.data (), y.data (), z.data (), N, s.data (),
170
175
t.data (), u.data ()) == 0 );
171
176
return plan;
@@ -204,6 +209,7 @@ int main() {
204
209
deconv_tol,
205
210
M,
206
211
N,
212
+ n_modes,
207
213
&d_x,
208
214
&d_y,
209
215
&d_z,
@@ -219,8 +225,8 @@ int main() {
219
225
using T = typename std::remove_pointer<decltype (plan)>::type::real_t ;
220
226
const int type = 3 ;
221
227
const auto cpu_plan = cpu_planer (type);
222
- assert (cufinufft_makeplan_impl<T>(type, dim, nullptr , iflag, ntransf, T (tol), &plan ,
223
- &opts) == 0 );
228
+ assert (cufinufft_makeplan_impl<T>(type, dim, ( int *)n_modes , iflag, ntransf, T (tol),
229
+ &plan, & opts) == 0 );
224
230
assert (cufinufft_setpts_impl<T>(M, d_x.data ().get (), d_y.data ().get (),
225
231
d_z.data ().get (), N, d_s.data ().get (),
226
232
d_t .data ().get (), d_u.data ().get (), plan) == 0 );
@@ -245,6 +251,11 @@ int main() {
245
251
assert (equal (plan->kz , cpu_plan->Z , M));
246
252
assert (equal (plan->d_s , cpu_plan->Sp , N));
247
253
assert (equal (plan->d_t , cpu_plan->Tp , N));
254
+ assert (plan->spopts .nspread == cpu_plan->spopts .nspread );
255
+ assert (plan->spopts .upsampfac == cpu_plan->spopts .upsampfac );
256
+ assert (plan->spopts .ES_beta == cpu_plan->spopts .ES_beta );
257
+ assert (plan->spopts .ES_halfwidth == cpu_plan->spopts .ES_halfwidth );
258
+ assert (plan->spopts .ES_c == cpu_plan->spopts .ES_c );
248
259
assert (equal (plan->d_u , cpu_plan->Up , N));
249
260
// NOTE:seems with infnorm we are getting at most 11 digits of precision
250
261
std::cout << " prephase :\n " ;
@@ -258,10 +269,10 @@ int main() {
258
269
c[i].imag (randm11 ());
259
270
}
260
271
d_c = c;
261
- for (int i = 0 ; i < N; i++) {
262
- fk[i] = {- 100 , - 100 };
263
- }
264
- d_fk = fk;
272
+ // for (int i = 0; i < N; i++) {
273
+ // fk[i] = {randm11(), randm11() };
274
+ // }
275
+ // d_fk = fk;
265
276
cufinufft_execute_impl (
266
277
(cuda_complex<T> *)d_c.data ().get (), (cuda_complex<T> *)d_fk.data ().get (), plan);
267
278
finufft_execute (cpu_plan, (std::complex<T> *)c.data (), (std::complex<T> *)fk.data ());
@@ -273,7 +284,7 @@ int main() {
273
284
};
274
285
// testing correctness of the plan creation
275
286
// cufinufft_plan_t<float> *single_plan{nullptr};
276
- cufinufft_plan_t <T > *double_plan{nullptr };
287
+ cufinufft_plan_t <test_t > *double_plan{nullptr };
277
288
// test_type1(double_plan);
278
289
// test_type2(double_plan);
279
290
test_type3 (double_plan);
0 commit comments