|
7 | 7 | #include "DataFormats/Provenance/interface/FileIndex.h"
|
8 | 8 | #include "DataFormats/Provenance/interface/IndexIntoFile.h"
|
9 | 9 |
|
| 10 | +#include "TBasket.h" |
10 | 11 | #include "TBranch.h"
|
11 | 12 | #include "TFile.h"
|
12 | 13 | #include "TIterator.h"
|
@@ -97,6 +98,131 @@ namespace edm {
|
97 | 98 | }
|
98 | 99 | }
|
99 | 100 |
|
| 101 | + namespace { |
| 102 | + class BranchBasketBytes { |
| 103 | + public: |
| 104 | + BranchBasketBytes(TBranch const *branch) |
| 105 | + : basketFirstEntry_(branch->GetBasketEntry()), |
| 106 | + basketBytes_(branch->GetBasketBytes()), |
| 107 | + branchName_(branch->GetName()), |
| 108 | + maxBaskets_(branch->GetMaxBaskets()) {} |
| 109 | + |
| 110 | + bool isAlignedWithClusterBoundaries() const { return isAligned_; } |
| 111 | + |
| 112 | + // Processes "next cluster" for the branch, calculating the |
| 113 | + // number of bytes and baskets in the cluster |
| 114 | + // |
| 115 | + // @param[in] clusterBegin Begin entry number for the cluster |
| 116 | + // @param[in] clusterEnd End entry number (exclusive) for the cluster |
| 117 | + // @param[out] nonAlignedBranches Branch name is added to the set if the basket boundary |
| 118 | + // does not align with cluster boundary |
| 119 | + // |
| 120 | + // @return Tuple of the number of bytes and baskets in the cluster |
| 121 | + std::tuple<Long64_t, unsigned> bytesInNextCluster(Long64_t clusterBegin, |
| 122 | + Long64_t clusterEnd, |
| 123 | + std::set<std::string_view> &nonAlignedBranches) { |
| 124 | + if (basketFirstEntry_[iBasket_] != clusterBegin) { |
| 125 | + std::cout << "Branch " << branchName_ << " iBasket " << iBasket_ << " begin entry " |
| 126 | + << basketFirstEntry_[iBasket_] << " does not align with cluster boundary, expected " << clusterBegin |
| 127 | + << std::endl; |
| 128 | + exit(1); |
| 129 | + } |
| 130 | + |
| 131 | + Long64_t bytes = 0; |
| 132 | + unsigned nbaskets = 0; |
| 133 | + for (; iBasket_ < maxBaskets_ and basketFirstEntry_[iBasket_] < clusterEnd; ++iBasket_) { |
| 134 | + bytes += basketBytes_[iBasket_]; |
| 135 | + ++nbaskets; |
| 136 | + } |
| 137 | + if (basketFirstEntry_[iBasket_] != clusterEnd) { |
| 138 | + nonAlignedBranches.insert(branchName_); |
| 139 | + isAligned_ = false; |
| 140 | + return std::tuple(0, 0); |
| 141 | + } |
| 142 | + return std::tuple(bytes, nbaskets); |
| 143 | + } |
| 144 | + |
| 145 | + private: |
| 146 | + Long64_t const *basketFirstEntry_; |
| 147 | + Int_t const *basketBytes_; |
| 148 | + std::string_view branchName_; |
| 149 | + Int_t maxBaskets_; |
| 150 | + Long64_t iBasket_ = 0; |
| 151 | + bool isAligned_ = true; |
| 152 | + }; |
| 153 | + |
| 154 | + std::vector<BranchBasketBytes> makeBranchBasketBytes(TBranch *branch, bool isEventsTree) { |
| 155 | + std::vector<BranchBasketBytes> ret; |
| 156 | + |
| 157 | + TObjArray *subBranches = branch->GetListOfBranches(); |
| 158 | + if (subBranches and subBranches->GetEntries() > 0) { |
| 159 | + // process sub-branches if there are any |
| 160 | + auto const nbranches = subBranches->GetEntries(); |
| 161 | + for (Long64_t iBranch = 0; iBranch < nbranches; ++iBranch) { |
| 162 | + auto vec = makeBranchBasketBytes(dynamic_cast<TBranch *>(subBranches->At(iBranch)), isEventsTree); |
| 163 | + ret.insert(ret.end(), std::make_move_iterator(vec.begin()), std::make_move_iterator(vec.end())); |
| 164 | + } |
| 165 | + } else { |
| 166 | + ret.emplace_back(branch); |
| 167 | + } |
| 168 | + return ret; |
| 169 | + } |
| 170 | + } // namespace |
| 171 | + |
| 172 | + void clusterPrint(TTree *tr, bool isEventsTree) { |
| 173 | + TTree::TClusterIterator clusterIter = tr->GetClusterIterator(0); |
| 174 | + Long64_t const nentries = tr->GetEntries(); |
| 175 | + |
| 176 | + // Keep the state of each branch basket index so that we don't |
| 177 | + // have to iterate through everything on every cluster |
| 178 | + std::vector<BranchBasketBytes> processors; |
| 179 | + { |
| 180 | + TObjArray *branches = tr->GetListOfBranches(); |
| 181 | + Long64_t const nbranches = branches->GetEntries(); |
| 182 | + for (Long64_t iBranch = 0; iBranch < nbranches; ++iBranch) { |
| 183 | + auto vec = makeBranchBasketBytes(dynamic_cast<TBranch *>(branches->At(iBranch)), isEventsTree); |
| 184 | + processors.insert(processors.end(), std::make_move_iterator(vec.begin()), std::make_move_iterator(vec.end())); |
| 185 | + } |
| 186 | + } |
| 187 | + |
| 188 | + std::cout << "Printing cluster boundaries in terms of tree entries of the tree " << tr->GetName() |
| 189 | + << ". Note that end boundary is exclusive." << std::endl; |
| 190 | + if (isEventsTree) { |
| 191 | + std::cout << "For the Events tree the metadata branches are excluded from this calculation, " |
| 192 | + "because their basket boundaries do not necessarily align with the cluster boundaries." |
| 193 | + << std::endl; |
| 194 | + } |
| 195 | + std::cout << std::setw(15) << "Begin" << std::setw(15) << "End" << std::setw(15) << "Entries" << std::setw(15) |
| 196 | + << "Max baskets" << std::setw(15) << "Bytes" << std::endl; |
| 197 | + // Record branches whose baskets do not align with cluster boundaires |
| 198 | + std::set<std::string_view> nonAlignedBranches; |
| 199 | + Long64_t clusterBegin; |
| 200 | + while ((clusterBegin = clusterIter()) < nentries) { |
| 201 | + Long64_t clusterEnd = clusterIter.GetNextEntry(); |
| 202 | + Long64_t bytes = 0; |
| 203 | + unsigned int maxbaskets = 0; |
| 204 | + for (auto &p : processors) { |
| 205 | + if (p.isAlignedWithClusterBoundaries()) { |
| 206 | + auto const [byt, bas] = p.bytesInNextCluster(clusterBegin, clusterEnd, nonAlignedBranches); |
| 207 | + bytes += byt; |
| 208 | + maxbaskets = std::max(bas, maxbaskets); |
| 209 | + } |
| 210 | + } |
| 211 | + std::cout << std::setw(15) << clusterBegin << std::setw(15) << clusterEnd << std::setw(15) |
| 212 | + << (clusterEnd - clusterBegin) << std::setw(15) << maxbaskets << std::setw(15) << bytes << std::endl; |
| 213 | + } |
| 214 | + |
| 215 | + if (not nonAlignedBranches.empty()) { |
| 216 | + std::cout << "\nThe following branches had baskets whose entry boundaries did not align with the cluster " |
| 217 | + "boundaries. Their baskets are excluded from the cluster size calculation above starting from the " |
| 218 | + "first basket that did not align with a cluster boundary." |
| 219 | + << std::endl; |
| 220 | + for (auto &name : nonAlignedBranches) { |
| 221 | + std::cout << " " << name << std::endl; |
| 222 | + } |
| 223 | + } |
| 224 | + } |
| 225 | + |
100 | 226 | std::string getUuid(TTree *uuidTree) {
|
101 | 227 | FileID fid;
|
102 | 228 | FileID *fidPtr = &fid;
|
|
0 commit comments