Merge pull request #1055 from PowerGridModel/bugfix/calculation-info-memory-creep

mgovers · web-flow · commit d1b975ad008d · 2025-08-01T08:17:57.000Z
Performance: fix memory creep + false sharing in calculation info
diff --git a/power_grid_model_c/power_grid_model/include/power_grid_model/job_dispatch.hpp b/power_grid_model_c/power_grid_model/include/power_grid_model/job_dispatch.hpp
@@ -9,6 +9,7 @@
 #include "main_core/calculation_info.hpp"
 #include "main_core/update.hpp"
 
+#include <mutex>
 #include <thread>
 
 namespace power_grid_model {
@@ -60,27 +61,35 @@ template <class MainModel, class... ComponentType> class JobDispatch {
 
         // error messages
         std::vector<std::string> exceptions(n_scenarios, "");
-        std::vector<CalculationInfo> infos(n_scenarios);
+
+        // thread-safe handling of calculation info
+        std::mutex calculation_info_mutex;
+        auto const thread_safe_add_calculation_info = [&calculation_info,
+                                                       &calculation_info_mutex](CalculationInfo const& info) {
+            std::lock_guard const lock{calculation_info_mutex};
+            main_core::merge_into(calculation_info, info);
+        };
 
         // lambda for sub batch calculation
         main_core::utils::SequenceIdx<ComponentType...> all_scenarios_sequence;
-        auto sub_batch = sub_batch_calculation_(model, std::forward<Calculate>(calculation_fn), result_data,
-                                                update_data, all_scenarios_sequence, exceptions, infos);
+        auto sub_batch =
+            sub_batch_calculation_(model, std::forward<Calculate>(calculation_fn), result_data, update_data,
+                                   all_scenarios_sequence, exceptions, thread_safe_add_calculation_info);
 
         job_dispatch(sub_batch, n_scenarios, threading);
 
         handle_batch_exceptions(exceptions);
-        calculation_info = main_core::merge_calculation_info(infos);
 
         return BatchParameter{};
     }
 
-    template <typename Calculate>
+    template <typename Calculate, typename AddCalculationInfo>
         requires std::invocable<std::remove_cvref_t<Calculate>, MainModel&, MutableDataset const&, Idx>
     static auto sub_batch_calculation_(MainModel const& base_model, Calculate&& calculation_fn,
                                        MutableDataset const& result_data, ConstDataset const& update_data,
                                        main_core::utils::SequenceIdx<ComponentType...>& all_scenarios_sequence,
-                                       std::vector<std::string>& exceptions, std::vector<CalculationInfo>& infos) {
+                                       std::vector<std::string>& exceptions,
+                                       AddCalculationInfo&& thread_safe_add_calculation_info) {
         // cache component update order where possible.
         // the order for a cacheable (independent) component by definition is the same across all scenarios
         auto const components_to_update = base_model.get_components_to_update(update_data);
@@ -89,56 +98,55 @@ template <class MainModel, class... ComponentType> class JobDispatch {
         all_scenarios_sequence = main_core::update::get_all_sequence_idx_map<ComponentType...>(
             base_model.state(), update_data, 0, components_to_update, update_independence, false);
 
-        return [&base_model, &exceptions, &infos, calculation_fn_ = std::forward<Calculate>(calculation_fn),
-                &result_data, &update_data, &all_scenarios_sequence_ = std::as_const(all_scenarios_sequence),
-                components_to_update, update_independence](Idx start, Idx stride, Idx n_scenarios) {
+        return [&base_model, &exceptions, &thread_safe_add_calculation_info,
+                calculation_fn_ = std::forward<Calculate>(calculation_fn), &result_data, &update_data,
+                &all_scenarios_sequence_ = std::as_const(all_scenarios_sequence), components_to_update,
+                update_independence](Idx start, Idx stride, Idx n_scenarios) {
             assert(n_scenarios <= narrow_cast<Idx>(exceptions.size()));
-            assert(n_scenarios <= narrow_cast<Idx>(infos.size()));
 
-            Timer const t_total(infos[start], 0000, "Total in thread");
+            CalculationInfo thread_info;
 
-            auto const copy_model_functor = [&base_model, &infos](Idx scenario_idx) {
-                Timer const t_copy_model_functor(infos[scenario_idx], 1100, "Copy model");
+            Timer t_total(thread_info, 0000, "Total in thread");
+
+            auto const copy_model_functor = [&base_model, &thread_info] {
+                Timer const t_copy_model_functor(thread_info, 1100, "Copy model");
                 return MainModel{base_model};
             };
-            auto model = copy_model_functor(start);
+            auto model = copy_model_functor();
 
             auto current_scenario_sequence_cache = main_core::utils::SequenceIdx<ComponentType...>{};
             auto [setup, winddown] =
                 scenario_update_restore(model, update_data, components_to_update, update_independence,
-                                        all_scenarios_sequence_, current_scenario_sequence_cache, infos);
+                                        all_scenarios_sequence_, current_scenario_sequence_cache, thread_info);
 
             auto calculate_scenario = JobDispatch::call_with<Idx>(
-                [&model, &calculation_fn_, &result_data, &infos](Idx scenario_idx) {
+                [&model, &calculation_fn_, &result_data, &thread_info](Idx scenario_idx) {
                     calculation_fn_(model, result_data, scenario_idx);
-                    infos[scenario_idx].merge(model.calculation_info());
+                    main_core::merge_into(thread_info, model.calculation_info());
                 },
-                std::move(setup), std::move(winddown), scenario_exception_handler(model, exceptions, infos),
-                [&model, &copy_model_functor](Idx scenario_idx) { model = copy_model_functor(scenario_idx); });
+                std::move(setup), std::move(winddown), scenario_exception_handler(model, exceptions, thread_info),
+                [&model, &copy_model_functor](Idx /*scenario_idx*/) { model = copy_model_functor(); });
 
             for (Idx scenario_idx = start; scenario_idx < n_scenarios; scenario_idx += stride) {
-                Timer const t_total_single(infos[scenario_idx], 0100, "Total single calculation in thread");
-
+                Timer const t_total_single(thread_info, 0100, "Total single calculation in thread");
                 calculate_scenario(scenario_idx);
             }
+
+            t_total.stop();
+            thread_safe_add_calculation_info(thread_info);
         };
     }
 
-    // run sequential if
-    //    specified threading < 0
-    //    use hardware threads, but it is either unknown (0) or only has one thread (1)
-    //    specified threading = 1
     template <typename RunSubBatchFn>
         requires std::invocable<std::remove_cvref_t<RunSubBatchFn>, Idx /*start*/, Idx /*stride*/, Idx /*n_scenarios*/>
     static void job_dispatch(RunSubBatchFn sub_batch, Idx n_scenarios, Idx threading) {
         // run batches sequential or parallel
-        auto const hardware_thread = static_cast<Idx>(std::thread::hardware_concurrency());
-        if (threading < 0 || threading == 1 || (threading == 0 && hardware_thread < 2)) {
+        auto const n_thread = n_threads(n_scenarios, threading);
+        if (n_thread == 1) {
             // run all in sequential
             sub_batch(0, 1, n_scenarios);
         } else {
             // create parallel threads
-            Idx const n_thread = std::min(threading == 0 ? hardware_thread : threading, n_scenarios);
             std::vector<std::thread> threads;
             threads.reserve(n_thread);
             for (Idx thread_number = 0; thread_number < n_thread; ++thread_number) {
@@ -151,6 +159,18 @@ template <class MainModel, class... ComponentType> class JobDispatch {
         }
     }
 
+    // run sequential if
+    //    specified threading < 0
+    //    use hardware threads, but it is either unknown (0) or only has one thread (1)
+    //    specified threading = 1
+    static Idx n_threads(Idx n_scenarios, Idx threading) {
+        auto const hardware_thread = static_cast<Idx>(std::thread::hardware_concurrency());
+        if (threading < 0 || threading == 1 || (threading == 0 && hardware_thread < 2)) {
+            return 1; // sequential
+        }
+        return std::min(threading == 0 ? hardware_thread : threading, n_scenarios);
+    }
+
     template <typename... Args, typename RunFn, typename SetupFn, typename WinddownFn, typename HandleExceptionFn,
               typename RecoverFromBadFn>
         requires std::invocable<std::remove_cvref_t<RunFn>, Args const&...> &&
@@ -184,7 +204,7 @@ template <class MainModel, class... ComponentType> class JobDispatch {
         main_core::update::independence::UpdateIndependence<ComponentType...> const& do_update_cache,
         main_core::utils::SequenceIdx<ComponentType...> const& all_scenario_sequence,
         main_core::utils::SequenceIdx<ComponentType...>& current_scenario_sequence_cache,
-        std::vector<CalculationInfo>& infos) noexcept {
+        CalculationInfo& info) noexcept {
         main_core::utils::ComponentFlags<ComponentType...> independence_flags{};
         std::ranges::transform(do_update_cache, independence_flags.begin(),
                                [](auto const& comp) { return comp.is_independent(); });
@@ -202,15 +222,15 @@ template <class MainModel, class... ComponentType> class JobDispatch {
 
         return std::make_pair(
             [&model, &update_data, scenario_sequence, &current_scenario_sequence_cache, &components_to_store,
-             do_update_cache_ = std::move(do_update_cache), &infos](Idx scenario_idx) {
-                Timer const t_update_model(infos[scenario_idx], 1200, "Update model");
+             do_update_cache_ = std::move(do_update_cache), &info](Idx scenario_idx) {
+                Timer const t_update_model(info, 1200, "Update model");
                 current_scenario_sequence_cache = main_core::update::get_all_sequence_idx_map<ComponentType...>(
                     model.state(), update_data, scenario_idx, components_to_store, do_update_cache_, true);
 
                 model.template update_components<cached_update_t>(update_data, scenario_idx, scenario_sequence());
             },
-            [&model, scenario_sequence, &current_scenario_sequence_cache, &infos](Idx scenario_idx) {
-                Timer const t_update_model(infos[scenario_idx], 1201, "Restore model");
+            [&model, scenario_sequence, &current_scenario_sequence_cache, &info](Idx /*scenario_idx*/) {
+                Timer const t_update_model(info, 1201, "Restore model");
 
                 model.restore_components(scenario_sequence());
                 std::ranges::for_each(current_scenario_sequence_cache,
@@ -220,8 +240,8 @@ template <class MainModel, class... ComponentType> class JobDispatch {
 
     // Lippincott pattern
     static auto scenario_exception_handler(MainModel& model, std::vector<std::string>& messages,
-                                           std::vector<CalculationInfo>& infos) {
-        return [&model, &messages, &infos](Idx scenario_idx) {
+                                           CalculationInfo& info) {
+        return [&model, &messages, &info](Idx scenario_idx) {
             std::exception_ptr const ex_ptr = std::current_exception();
             try {
                 std::rethrow_exception(ex_ptr);
@@ -230,7 +250,7 @@ template <class MainModel, class... ComponentType> class JobDispatch {
             } catch (...) {
                 messages[scenario_idx] = "unknown exception";
             }
-            infos[scenario_idx].merge(model.calculation_info());
+            info.merge(model.calculation_info());
         };
     }
 
diff --git a/power_grid_model_c/power_grid_model/include/power_grid_model/main_core/calculation_info.hpp b/power_grid_model_c/power_grid_model/include/power_grid_model/main_core/calculation_info.hpp
@@ -10,21 +10,16 @@
 
 namespace power_grid_model::main_core {
 
-inline CalculationInfo merge_calculation_info(std::vector<CalculationInfo> const& infos) {
-    CalculationInfo result;
-
-    auto const key = Timer::make_key(2226, "Max number of iterations");
-    for (auto const& info : infos) {
-        for (auto const& [k, v] : info) {
-            if (k == key) {
-                result[k] = std::max(result[k], v);
-            } else {
-                result[k] += v;
-            }
+inline CalculationInfo& merge_into(CalculationInfo& destination, CalculationInfo const& source) {
+    static auto const key = Timer::make_key(2226, "Max number of iterations");
+    for (auto const& [k, v] : source) {
+        if (k == key) {
+            destination[k] = std::max(destination[k], v);
+        } else {
+            destination[k] += v;
         }
     }
-
-    return result;
+    return destination;
 }
 
 } // namespace power_grid_model::main_core