Skip to content

Commit 764f98b

Browse files
author
Paul F Baumeister
committed
remove trailing whitespaces
1 parent f4a368d commit 764f98b

10 files changed

+60
-60
lines changed

tfQMRgpu/include/tfqmrgpu_Fortran.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
integer(kind=4), parameter :: TFQMRGPU_LAYOUT_RIRIRIRI = 85 !! default host layout, real and imag parts are interleaved.
2121
integer(kind=4), parameter :: TFQMRGPU_LAYOUT_DEFAULT = 85 !! default Fortran data layout for complex and double complex
2222

23-
! !! pointer types require 64bit
23+
! !! pointer types require 64bit
2424
integer, parameter :: TFQMRGPU_HANDLE_KIND = 8 !! a pointer to an opaque handle
2525
integer, parameter :: TFQMRGPU_PLAN_KIND = 8 !! a pointer to an opaque plan object
2626
integer, parameter :: TFQMRGPU_PTR_KIND = 8 !! a pointer to data

tfQMRgpu/include/tfqmrgpu_Fortran_module.F90

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ module tfqmrgpu
5454
module procedure bsrsv_solve, &
5555
tfqmrgpu_bsrsv_complete
5656
endinterface
57-
57+
5858
contains
5959

6060
subroutine print_error(status, ierr)
@@ -93,10 +93,10 @@ subroutine getStream(handle, streamId, ierr)
9393
external :: tfqmrgpugetstream
9494
call tfqmrgpugetstream(handle, streamId, ierr)
9595
endsubroutine ! get
96-
96+
9797

9898
#define DevPtrType integer(kind=8)
99-
99+
100100
subroutine createWorkspace(pBuffer, pBufferSizeInBytes, ierr)
101101
integer(kind=4), intent(out) :: ierr ! this is the return value in the C-API
102102
DevPtrType, intent(inout) :: pBuffer
@@ -154,7 +154,7 @@ subroutine bsrsv_destroyPlan(handle, plan, ierr)
154154
external :: tfqmrgpu_bsrsv_destroyplan
155155
call tfqmrgpu_bsrsv_destroyplan(handle, plan, ierr)
156156
endsubroutine ! destroy
157-
157+
158158
subroutine bsrsv_bufferSize(handle, plan, &
159159
ldA, blockDim, ldB, RhsBlockDim, &
160160
doublePrecision, pBufferSizeInBytes, ierr)
@@ -202,7 +202,7 @@ subroutine bsrsv_getBuffer(handle, plan, pBuffer, ierr)
202202
#ifdef DEBUG
203203
write(*, '(a,":",i0,a,z0)') __FILE__, &
204204
__LINE__," got pBuffer = 0x",pBuffer
205-
#endif
205+
#endif
206206
endsubroutine ! get
207207

208208
subroutine bsrsv_setMatrix_c(handle, plan, var, val, ld, trans, layout, ierr)
@@ -232,7 +232,7 @@ subroutine bsrsv_setMatrix_z(handle, plan, var, val, ld, trans, layout, ierr)
232232
external :: tfqmrgpu_bsrsv_setmatrix_z
233233
call tfqmrgpu_bsrsv_setmatrix_z(handle, plan, var, val, ld, trans, layout, ierr)
234234
endsubroutine ! set
235-
235+
236236
subroutine bsrsv_getMatrix_c(handle, plan, var, val, ld, trans, layout, ierr)
237237
!! retrieves the GPU memory buffer registered in plan.
238238
integer(kind=4), intent(out) :: ierr ! this is the return value in the C-API

tfQMRgpu/include/tfqmrgpu_example_reader.hxx

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -170,16 +170,16 @@ namespace tfqmrgpu_example_reader {
170170
std::cout << "# non-zeros " << avg_nzpr << " +/- " << dev_nzpr << " in " << nzpr0 << " of " << op->nRows << " rows" << std::endl;
171171
double dev_nzpc; double const avg_nzpc = average_and_deviation(nzpc0, nzpc1, nzpc2, &dev_nzpc);
172172
std::cout << "# non-zeros " << avg_nzpc << " +/- " << dev_nzpc << " in " << nzpc0 << " of " << op->nCols << " columns" << std::endl;
173-
173+
174174
std::cout << std::endl;
175175
} // op
176176

177177
auto const A = &(ABX[0]), B = &(ABX[1]), X = &(ABX[2]);
178-
178+
179179
assert(B->nCols == nCols); // number of right hand sides
180180
assert(X->nCols == nCols); // number of right hand sides, redundant info, sorry
181181
assert(X->nRows == A->nCols); // multiplication of A*X must be well-defined
182-
182+
183183
assert(A->nRows == A->nCols); // A is assmed to be a square operator here
184184
assert(A->fastBlockDim == A->slowBlockDim); // A is assmed to be a square operator here
185185

@@ -208,9 +208,9 @@ namespace tfqmrgpu_example_reader {
208208
}
209209
std::cout << std::endl;
210210
} // 0
211-
211+
212212
} // elongate the B operator
213-
213+
214214
return tolerance;
215215
} // read_in
216216

tfQMRgpu/include/tfqmrgpu_example_xml_reader.hxx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ namespace tfqmrgpu_example_xml_reader {
6060
rapidxml::xml_node<> const *node
6161
, char const *const child_name
6262
, int const echo=0
63-
) {
63+
) {
6464
if (nullptr != node) {
6565
for (auto child = node->first_node(); child; child = child->next_sibling()) {
6666
if (0 == std::strcmp(child_name, child->name())) {
@@ -117,7 +117,7 @@ namespace tfqmrgpu_example_xml_reader {
117117

118118
// create the root node
119119
rapidxml::xml_document<> doc;
120-
120+
121121
if (echo > 0) std::printf("# parse file content using rapidxml\n");
122122
doc.parse<0>(infile.data());
123123

@@ -172,7 +172,7 @@ namespace tfqmrgpu_example_xml_reader {
172172
std::printf("\n# Warning! Cannot find CompressedSparseRow in SparseMatrix\n\n");
173173
return 0;
174174
} // no csr found
175-
175+
176176
auto const nzpr = find_child(csr, "NonzerosPerRow", echo);
177177
if (nzpr) {
178178
int const nrows = std::atoi(find_attribute(nzpr, "rows", "0", echo));
@@ -247,7 +247,7 @@ namespace tfqmrgpu_example_xml_reader {
247247
} else { // SparseMatrix
248248
std::printf("\n# Warning! Cannot find a SparseMatrix for operator %s\n\n", id);
249249
} // SparseMatrix
250-
250+
251251
auto const DataTensor = find_child(BSM, "DataTensor", echo);
252252
if (DataTensor) {
253253
scale_values[abx] = std::atof(find_attribute(DataTensor, "scale", "1", echo));

tfQMRgpu/include/tfqmrgpu_linalg.hxx

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@
2222
#endif // DEBUG
2323

2424
namespace tfqmrgpu {
25-
25+
2626
// tfQMR decision sections ////////////////////////////////////////////////////////////////////////
27-
27+
2828
#define EPSILON 2.5e-308
2929

3030

@@ -36,7 +36,7 @@ namespace tfqmrgpu {
3636
, double const (*devPtr z35)[2][LM] // inner product v3.v5
3737
, uint32_t const nCols
3838
) {
39-
#ifndef HAS_NO_CUDA
39+
#ifndef HAS_NO_CUDA
4040
check_launch_params( { nCols, 1, 1 }, { LM, 1, 1 } );
4141
{ int const i = blockIdx.x;
4242
{ int const j = threadIdx.x;
@@ -81,7 +81,7 @@ namespace tfqmrgpu {
8181
, double const (*devPtr var)[LM] // var
8282
, uint32_t const nCols
8383
) {
84-
#ifndef HAS_NO_CUDA
84+
#ifndef HAS_NO_CUDA
8585
check_launch_params( { nCols, 1, 1 }, { LM, 1, 1 } );
8686
{ int const i = blockIdx.x;
8787
{ int const j = threadIdx.x;
@@ -122,8 +122,8 @@ namespace tfqmrgpu {
122122
} // threads j
123123
} // blocks i
124124
} // dec34
125-
126-
125+
126+
127127
template <typename real_t, int LM>
128128
void __global__ tfQMRdecT( // GPU kernel, must be launched with <<< nCols, LM >>>
129129
int8_t (*devPtr status)[LM] // tfQMR status
@@ -177,11 +177,11 @@ namespace tfqmrgpu {
177177
} // blocks i
178178
} // decT
179179

180-
181-
180+
181+
182182
// basis linear algebra kernels ////////////////////////////////////////////////////////////////////////
183183

184-
184+
185185
template <typename real_in_t, typename real_out_t>
186186
void __global__ convert_precision( // GPU kernel, must be launched with <<< { any, 1, 1 }, { any, 1, 1 } >>>
187187
real_out_t (*devPtr out) // result, out
@@ -335,7 +335,7 @@ namespace tfqmrgpu {
335335
}
336336
} // transpose_blocks
337337

338-
338+
339339
#ifndef HAS_NO_CUDA
340340
template <typename real_t, int LM>
341341
void __global__ add_RHS_kernel( // GPU kernel, must be launched with <<< { any, 1, 1 }, { LM, 1, 1 } >>>
@@ -384,8 +384,8 @@ namespace tfqmrgpu {
384384
#endif // HAS_CUDA
385385
} // add_RHS
386386

387-
388-
387+
388+
389389
#ifndef HAS_NO_CUDA
390390
template <typename real_t, int LM>
391391
void __global__ set_unit_blocks_kernel( // GPU kernel, must be launched with <<< { nnzb, 1, 1 }, { LM, 1, 1 } >>>
@@ -424,8 +424,8 @@ namespace tfqmrgpu {
424424
} // inzb
425425
#endif // HAS_CUDA
426426
} // set_unit_blocks
427-
428-
427+
428+
429429

430430
// linear algebra functions ////////////////////////////////////////////////////////////////////////////////////////
431431

@@ -470,7 +470,7 @@ namespace tfqmrgpu {
470470
if (2 == D2) {
471471
dots[iput*nCols + icol][1][j] = di; // no race condition here
472472
} // D2
473-
473+
474474
} // inz
475475

476476
} // col_inner
@@ -648,8 +648,8 @@ namespace tfqmrgpu {
648648

649649

650650
// basis linear algebra level 3 kernels ////////////////////////////////////////////////////////////////////////
651-
652-
#ifndef HAS_NO_CUDA
651+
652+
#ifndef HAS_NO_CUDA
653653
template <typename real_t, int LM>
654654
void __global__ set_complex_value_kernel(
655655
real_t (*devPtr array)[2][LM] // 1D launch with correct size
@@ -682,8 +682,8 @@ namespace tfqmrgpu {
682682
#endif // HAS_CUDA
683683
} // set_complex_value
684684

685-
686-
#ifndef HAS_NO_CUDA
685+
686+
#ifndef HAS_NO_CUDA
687687
template <typename real_t, int LM>
688688
void __global__ set_real_value_kernel(
689689
real_t (*devPtr array)[LM] // 1D launch with correct size
@@ -712,8 +712,8 @@ namespace tfqmrgpu {
712712
} // iblock
713713
#endif // HAS_CUDA
714714
} // set_real_value
715-
716-
715+
716+
717717
inline tfqmrgpuStatus_t create_random_numbers(
718718
float (*devPtr v3)
719719
, size_t const length // number of floats in v3

tfQMRgpu/include/tfqmrgpu_plan.hxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#include "tfqmrgpu_memWindow.h" // memWindow_t
66

77
struct bsrsv_plan_t {
8-
8+
99
char* pBuffer; // device memory buffer
1010

1111
uint32_t nRows; // number of block rows

tfQMRgpu/include/tfqmrgpu_util.hxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,6 @@
119119
} // i
120120
} // master
121121
} // print_array
122-
122+
123123
// absolute square of a complex number computed in double
124124
inline __host__ __device__ double abs2(double const zRe, double const zIm) { return zRe*zRe + zIm*zIm; }

tfQMRgpu/source/bench_tfqmrgpu.cu

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
#include "tfqmrgpu_util.hxx" // FlopChar, CCheck, copy_data_to_gpu, get_data_from_gpu
1818
#ifndef HAS_NO_CUDA
19-
#include "tfqmrgpu_blockmult.hxx" // gemmNxNf, gemmNxNf1
19+
#include "tfqmrgpu_blockmult.hxx" // gemmNxNf
2020
#endif // HAS_CUDA
2121

2222
#ifdef DEBUG
@@ -69,7 +69,7 @@ namespace GPUbench {
6969
// step 3: register the CUDA stream in the handle
7070
callAndCheck( tfqmrgpuSetStream(handle, streamId) )
7171

72-
if (1) { // sanity check
72+
if (1) { // sanity check
7373
auto streamId_copy{streamId};
7474
callAndCheck( tfqmrgpuGetStream(handle, &streamId_copy) )
7575
assert(streamId == streamId_copy);
@@ -113,7 +113,7 @@ namespace GPUbench {
113113
// step 7: register the GPU memory buffer in the bsrsv-plan
114114
callAndCheck( tfqmrgpu_bsrsv_setBuffer(handle, plan, pBuffer) )
115115

116-
if (1) { // sanity check
116+
if (1) { // sanity check
117117
auto pBuffer_copy{pBuffer};
118118
callAndCheck( tfqmrgpu_bsrsv_getBuffer(handle, plan, &pBuffer_copy) )
119119
assert(pBuffer == pBuffer_copy);
@@ -143,7 +143,7 @@ namespace GPUbench {
143143
// compare matX and matR (the reference matrix)
144144
auto const sizeX = X->mat.size();
145145
std::vector<double> Xref(X->mat); // copy constructor
146-
146+
147147
// step d: retrieve the result vectors X
148148
// convert the blocks into ColMajor and RIRIRIRI to match the Fortran data layout
149149
callAndCheck( tfqmrgpu_bsrsv_getMatrix(handle, plan, 'X',
@@ -539,6 +539,6 @@ int main(int const argc, char const *const argv[]) {
539539
std::printf("# found tolerance %g\n", tolerance);
540540
std::printf("# Execute %d repetitions with max. %d iterations.\n", nrep, MaxIter);
541541
std::printf("# requested precision = %c for LM = %d\n", flouble, ABX[0].fastBlockDim);
542-
542+
543543
return GPUbench::benchmark_tfQMRgpu_library(ABX, tolerance, MaxIter, nrep, flouble);
544544
} // main

0 commit comments

Comments
 (0)