@@ -563,6 +563,61 @@ namespace yask {
563
563
// Index for distinguishing my var from neighbors' vars.
564
564
enum dirty_idx { self, others };
565
565
566
+ // Reduction result.
567
+ class red_res : public yk_var ::yk_reduction_result {
568
+
569
+ protected:
570
+ char _pad[CACHELINE_BYTES]; // prevent false sharing.
571
+
572
+ public:
573
+ int _mask = 0 ;
574
+ idx_t _nred = 0 ;
575
+ double _sum = 0.0 ;
576
+ double _sum_sq = 0.0 ;
577
+ double _prod = 1.0 ;
578
+ double _max = DBL_MIN;
579
+ double _min = DBL_MAX;
580
+
581
+ virtual ~red_res () { }
582
+
583
+ // / Get the allowed reductions.
584
+ int get_reduction_mask () const {
585
+ return _mask;
586
+ }
587
+
588
+ // / Get the number of elements reduced.
589
+ idx_t get_num_elements_reduced () const {
590
+ return _nred;
591
+ }
592
+
593
+ // / Get results
594
+ double get_sum () const {
595
+ if (_mask & yk_var::yk_sum_reduction)
596
+ return _sum;
597
+ THROW_YASK_EXCEPTION (" Sum reduction result was not requested in reduction_mask" );
598
+ }
599
+ double get_sum_squares () const {
600
+ if (_mask & yk_var::yk_sum_squares_reduction)
601
+ return _sum;
602
+ THROW_YASK_EXCEPTION (" Sum-of-squares reduction result was not requested in reduction_mask" );
603
+ }
604
+ double get_product () const {
605
+ if (_mask & yk_var::yk_product_reduction)
606
+ return _prod;
607
+ THROW_YASK_EXCEPTION (" Product reduction result was not requested in reduction_mask" );
608
+ }
609
+ double get_max () const {
610
+ if (_mask & yk_var::yk_max_reduction)
611
+ return _max;
612
+ THROW_YASK_EXCEPTION (" Max reduction result was not requested in reduction_mask" );
613
+ }
614
+ double get_min () const {
615
+ if (_mask & yk_var::yk_sum_reduction)
616
+ return _min;
617
+ THROW_YASK_EXCEPTION (" Min reduction result was not requested in reduction_mask" );
618
+ }
619
+ };
620
+
566
621
protected:
567
622
568
623
// Ptr to the core data.
@@ -610,7 +665,7 @@ namespace yask {
610
665
611
666
// Coherency of device data.
612
667
Coherency _coh;
613
-
668
+
614
669
// Convenience function to format indices like
615
670
// "x=5, y=3".
616
671
virtual std::string make_index_string (const Indices& idxs,
@@ -953,61 +1008,6 @@ namespace yask {
953
1008
#endif
954
1009
}
955
1010
956
- // Reduction result.
957
- class red_res : public yk_var ::yk_reduction_result {
958
-
959
- protected:
960
- char _pad[CACHELINE_BYTES]; // prevent false sharing.
961
-
962
- public:
963
- int _mask = 0 ;
964
- idx_t _nred = 0 ;
965
- double _sum = 0.0 ;
966
- double _sum_sq = 0.0 ;
967
- double _prod = 1.0 ;
968
- double _max = DBL_MIN;
969
- double _min = DBL_MAX;
970
-
971
- virtual ~red_res () { }
972
-
973
- // / Get the allowed reductions.
974
- int get_reduction_mask () const {
975
- return _mask;
976
- }
977
-
978
- // / Get the number of elements reduced.
979
- idx_t get_num_elements_reduced () const {
980
- return _nred;
981
- }
982
-
983
- // / Get results
984
- double get_sum () const {
985
- if (_mask & yk_var::yk_sum_reduction)
986
- return _sum;
987
- THROW_YASK_EXCEPTION (" Sum reduction result was not requested in reduction_mask" );
988
- }
989
- double get_sum_squares () const {
990
- if (_mask & yk_var::yk_sum_squares_reduction)
991
- return _sum;
992
- THROW_YASK_EXCEPTION (" Sum-of-squares reduction result was not requested in reduction_mask" );
993
- }
994
- double get_product () const {
995
- if (_mask & yk_var::yk_product_reduction)
996
- return _prod;
997
- THROW_YASK_EXCEPTION (" Product reduction result was not requested in reduction_mask" );
998
- }
999
- double get_max () const {
1000
- if (_mask & yk_var::yk_max_reduction)
1001
- return _max;
1002
- THROW_YASK_EXCEPTION (" Max reduction result was not requested in reduction_mask" );
1003
- }
1004
- double get_min () const {
1005
- if (_mask & yk_var::yk_sum_reduction)
1006
- return _min;
1007
- THROW_YASK_EXCEPTION (" Min reduction result was not requested in reduction_mask" );
1008
- }
1009
- };
1010
-
1011
1011
// Reductions.
1012
1012
virtual yk_var::yk_reduction_result_ptr
1013
1013
reduce_elements_in_slice (int reduction_mask,
@@ -1414,15 +1414,16 @@ namespace yask {
1414
1414
// Make array of results, one for each thread,
1415
1415
// so we don't have to use atomics or critical sections.
1416
1416
int nthr = yask_get_num_threads ();
1417
- red_res resa[nthr];
1417
+ std::vector<red_res> rrv;
1418
+ rrv.resize (nthr);
1418
1419
for (int i = 0 ; i < nthr; i++)
1419
- resa [i]._mask = reduction_mask;
1420
+ rrv [i]._mask = reduction_mask;
1420
1421
1421
1422
// Call the generic visit.
1422
1423
// TODO: clean up ptr cast.
1423
1424
auto n = dynamic_cast <VarT*>(const_cast <YkVarBase*>(this ))->template
1424
1425
_visit_elements_in_slice<RedElem, T, VarT>(strict_indices,
1425
- (T*)resa , IDX_MAX, 0 ,
1426
+ (T*)rrv. data () , IDX_MAX, 0 ,
1426
1427
first_indices, last_indices,
1427
1428
on_device);
1428
1429
@@ -1434,11 +1435,12 @@ namespace yask {
1434
1435
// Join per-thread results.
1435
1436
for (int i = 0 ; i < nthr; i++) {
1436
1437
auto * p = resp.get ();
1437
- p->_sum += resa[i]._sum ;
1438
- p->_sum_sq += resa[i]._sum ;
1439
- p->_prod *= resa[i]._prod ;
1440
- p->_max = std::max (p->_max , resa[i]._max );
1441
- p->_min = std::min (p->_min , resa[i]._min );
1438
+ auto * resp = &rrv.at (i);
1439
+ p->_sum += resp->_sum ;
1440
+ p->_sum_sq += resp->_sum ;
1441
+ p->_prod *= resp->_prod ;
1442
+ p->_max = std::max (p->_max , resp->_max );
1443
+ p->_min = std::min (p->_min , resp->_min );
1442
1444
}
1443
1445
1444
1446
return resp;
0 commit comments