Skip to content

Commit b2e64e2

Browse files
committed
Add an option to edmFileUtil to print information on clusters
1 parent 2ddeb78 commit b2e64e2

File tree

3 files changed

+143
-1
lines changed

3 files changed

+143
-1
lines changed

IOPool/Common/bin/CollUtil.cc

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "DataFormats/Provenance/interface/FileIndex.h"
88
#include "DataFormats/Provenance/interface/IndexIntoFile.h"
99

10+
#include "TBasket.h"
1011
#include "TBranch.h"
1112
#include "TFile.h"
1213
#include "TIterator.h"
@@ -97,6 +98,131 @@ namespace edm {
9798
}
9899
}
99100

101+
namespace {
102+
class BranchBasketBytes {
103+
public:
104+
BranchBasketBytes(TBranch const *branch)
105+
: basketFirstEntry_(branch->GetBasketEntry()),
106+
basketBytes_(branch->GetBasketBytes()),
107+
branchName_(branch->GetName()),
108+
maxBaskets_(branch->GetMaxBaskets()) {}
109+
110+
bool isAlignedWithClusterBoundaries() const { return isAligned_; }
111+
112+
// Processes "next cluster" for the branch, calculating the
113+
// number of bytes and baskets in the cluster
114+
//
115+
// @param[in] clusterBegin Begin entry number for the cluster
116+
// @param[in] clusterEnd End entry number (exclusive) for the cluster
117+
// @param[out] nonAlignedBranches Branch name is added to the set if the basket boundary
118+
// does not align with cluster boundary
119+
//
120+
// @return Tuple of the number of bytes and baskets in the cluster
121+
std::tuple<Long64_t, unsigned> bytesInNextCluster(Long64_t clusterBegin,
122+
Long64_t clusterEnd,
123+
std::set<std::string_view> &nonAlignedBranches) {
124+
if (basketFirstEntry_[iBasket_] != clusterBegin) {
125+
std::cout << "Branch " << branchName_ << " iBasket " << iBasket_ << " begin entry "
126+
<< basketFirstEntry_[iBasket_] << " does not align with cluster boundary, expected " << clusterBegin
127+
<< std::endl;
128+
exit(1);
129+
}
130+
131+
Long64_t bytes = 0;
132+
unsigned nbaskets = 0;
133+
for (; iBasket_ < maxBaskets_ and basketFirstEntry_[iBasket_] < clusterEnd; ++iBasket_) {
134+
bytes += basketBytes_[iBasket_];
135+
++nbaskets;
136+
}
137+
if (basketFirstEntry_[iBasket_] != clusterEnd) {
138+
nonAlignedBranches.insert(branchName_);
139+
isAligned_ = false;
140+
return std::tuple(0, 0);
141+
}
142+
return std::tuple(bytes, nbaskets);
143+
}
144+
145+
private:
146+
Long64_t const *basketFirstEntry_;
147+
Int_t const *basketBytes_;
148+
std::string_view branchName_;
149+
Int_t maxBaskets_;
150+
Long64_t iBasket_ = 0;
151+
bool isAligned_ = true;
152+
};
153+
154+
std::vector<BranchBasketBytes> makeBranchBasketBytes(TBranch *branch, bool isEventsTree) {
155+
std::vector<BranchBasketBytes> ret;
156+
157+
TObjArray *subBranches = branch->GetListOfBranches();
158+
if (subBranches and subBranches->GetEntries() > 0) {
159+
// process sub-branches if there are any
160+
auto const nbranches = subBranches->GetEntries();
161+
for (Long64_t iBranch = 0; iBranch < nbranches; ++iBranch) {
162+
auto vec = makeBranchBasketBytes(dynamic_cast<TBranch *>(subBranches->At(iBranch)), isEventsTree);
163+
ret.insert(ret.end(), std::make_move_iterator(vec.begin()), std::make_move_iterator(vec.end()));
164+
}
165+
} else {
166+
ret.emplace_back(branch);
167+
}
168+
return ret;
169+
}
170+
} // namespace
171+
172+
void clusterPrint(TTree *tr, bool isEventsTree) {
173+
TTree::TClusterIterator clusterIter = tr->GetClusterIterator(0);
174+
Long64_t const nentries = tr->GetEntries();
175+
176+
// Keep the state of each branch basket index so that we don't
177+
// have to iterate through everything on every cluster
178+
std::vector<BranchBasketBytes> processors;
179+
{
180+
TObjArray *branches = tr->GetListOfBranches();
181+
Long64_t const nbranches = branches->GetEntries();
182+
for (Long64_t iBranch = 0; iBranch < nbranches; ++iBranch) {
183+
auto vec = makeBranchBasketBytes(dynamic_cast<TBranch *>(branches->At(iBranch)), isEventsTree);
184+
processors.insert(processors.end(), std::make_move_iterator(vec.begin()), std::make_move_iterator(vec.end()));
185+
}
186+
}
187+
188+
std::cout << "Printing cluster boundaries in terms of tree entries of the tree " << tr->GetName()
189+
<< ". Note that end boundary is exclusive." << std::endl;
190+
if (isEventsTree) {
191+
std::cout << "For the Events tree the metadata branches are excluded from this calculation, "
192+
"because their basket boundaries do not necessarily align with the cluster boundaries."
193+
<< std::endl;
194+
}
195+
std::cout << std::setw(15) << "Begin" << std::setw(15) << "End" << std::setw(15) << "Entries" << std::setw(15)
196+
<< "Max baskets" << std::setw(15) << "Bytes" << std::endl;
197+
// Record branches whose baskets do not align with cluster boundaires
198+
std::set<std::string_view> nonAlignedBranches;
199+
Long64_t clusterBegin;
200+
while ((clusterBegin = clusterIter()) < nentries) {
201+
Long64_t clusterEnd = clusterIter.GetNextEntry();
202+
Long64_t bytes = 0;
203+
unsigned int maxbaskets = 0;
204+
for (auto &p : processors) {
205+
if (p.isAlignedWithClusterBoundaries()) {
206+
auto const [byt, bas] = p.bytesInNextCluster(clusterBegin, clusterEnd, nonAlignedBranches);
207+
bytes += byt;
208+
maxbaskets = std::max(bas, maxbaskets);
209+
}
210+
}
211+
std::cout << std::setw(15) << clusterBegin << std::setw(15) << clusterEnd << std::setw(15)
212+
<< (clusterEnd - clusterBegin) << std::setw(15) << maxbaskets << std::setw(15) << bytes << std::endl;
213+
}
214+
215+
if (not nonAlignedBranches.empty()) {
216+
std::cout << "\nThe following branches had baskets whose entry boundaries did not align with the cluster "
217+
"boundaries. Their baskets are excluded from the cluster size calculation above starting from the "
218+
"first basket that did not align with a cluster boundary."
219+
<< std::endl;
220+
for (auto &name : nonAlignedBranches) {
221+
std::cout << " " << name << std::endl;
222+
}
223+
}
224+
}
225+
100226
std::string getUuid(TTree *uuidTree) {
101227
FileID fid;
102228
FileID *fidPtr = &fid;

IOPool/Common/bin/CollUtil.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ namespace edm {
1515
Long64_t numEntries(TFile *hdl, const std::string &trname);
1616
void printBranchNames(TTree *tree);
1717
void longBranchPrint(TTree *tr);
18+
void clusterPrint(TTree *tr, bool isEventsTree);
1819
std::string getUuid(TTree *uuidTree);
1920
void printUuids(TTree *uuidTree);
2021
void printEventLists(TFile *tfl);

IOPool/Common/bin/EdmFileUtil.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,10 @@ int main(int argc, char* argv[]) {
4242
"JSON,j", "JSON output format. Any arguments listed below are ignored")("ls,l", "list file content")(
4343
"map,m", "Print TFile::Map(\"extended\"). The output can be HUGE.")("print,P", "Print all")(
4444
"verbose,v", "Verbose printout")("printBranchDetails,b", "Call Print()sc for all branches")(
45-
"tree,t", boost::program_options::value<std::string>(), "Select tree used with -P and -b options")(
45+
"printClusters", "Print detailed information about baskets and clusters for all branches")(
46+
"tree,t",
47+
boost::program_options::value<std::string>(),
48+
"Select tree used with -P, -b, and --printClusters options")(
4649
"events,e",
4750
"Print list of all Events, Runs, and LuminosityBlocks in the file sorted by run number, luminosity block number, "
4851
"and event number. Also prints the entry numbers and whether it is possible to use fast copy with the file.")(
@@ -110,6 +113,7 @@ int main(int argc, char* argv[]) {
110113
bool tree = more && (vm.count("tree") > 0 ? true : false);
111114
bool print = more && (vm.count("print") > 0 ? true : false);
112115
bool printBranchDetails = more && (vm.count("printBranchDetails") > 0 ? true : false);
116+
bool printClusters = more && (vm.count("printClusters") > 0 ? true : false);
113117
bool onlyDecodeLFN =
114118
decodeLFN && !(uuid || adler32 || allowRecovery || json || events || tree || ls || print || printBranchDetails);
115119
std::string selectedTree = tree ? vm["tree"].as<std::string>() : edm::poolNames::eventTreeName();
@@ -274,6 +278,17 @@ int main(int argc, char* argv[]) {
274278
edm::longBranchPrint(printTree);
275279
}
276280

281+
if (printClusters) {
282+
bool const isEventsTree = (selectedTree == edm::BranchTypeToProductTreeName(edm::InEvent));
283+
TTree* printTree = (TTree*)tfile->Get(selectedTree.c_str());
284+
if (printTree == nullptr) {
285+
std::cout << "Tree " << selectedTree << " appears to be missing. Could not find it in the file.\n";
286+
std::cout << "Exiting\n";
287+
return 1;
288+
}
289+
edm::clusterPrint(printTree, isEventsTree);
290+
}
291+
277292
// Print out event lists
278293
if (events) {
279294
edm::printEventLists(tfile.get());

0 commit comments

Comments
 (0)