EIC Software
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
compareRootFiles.C
Go to the documentation of this file. Or view the newest version in sPHENIX GitHub for file compareRootFiles.C
1 // This file is part of the Acts project.
2 //
3 // Copyright (C) 2017 CERN for the benefit of the Acts project
4 //
5 // This Source Code Form is subject to the terms of the Mozilla Public
6 // License, v. 2.0. If a copy of the MPL was not distributed with this
7 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 
9 // This ROOT script compares two ROOT files in an order-insensitive way. Its
10 // intended use is to compare the output of a single-threaded and multi-threaded
11 // programs in order to check that results are perfectly reproducible.
12 //
13 // As a current limitation, which may be lifted in the future, the script does
14 // all of its processing in RAM, which means that the input dataset must fit in
15 // RAM. So do not try to run this on terabytes of data. You don't need that much
16 // data to check that your multithreaded program runs well anyhow.
17 //
18 // Another limitation is that the comparison relies on perfect output
19 // reproducibility, which is a very costly guarantee to achieve in a
20 // multi-threaded environment. If you want to compare "slightly different"
21 // outputs, this script will not work as currently written. I cannot think of a
22 // way in which imperfect reproducibility could be checked in a manner which
23 // doesn't depend on the details of the data being compared.
24 
25 #include <cstring>
26 #include <map>
27 #include <string>
28 #include <utility>
29 #include <vector>
30 
31 #include "TBranch.h"
32 #include "TFile.h"
33 #include "TKey.h"
34 #include "TList.h"
35 #include "TObject.h"
36 #include "TTree.h"
37 #include "TTreeReader.h"
38 
39 #include "compareRootFiles.hpp"
40 
41 // Minimal mechanism for assertion checking and comparison
42 #define CHECK(pred, msg) \
43  if (!(pred)) { \
44  std::cout << msg << std::endl; \
45  return 1; \
46  }
47 
48 #define CHECK_EQUAL(v1, v2, msg) \
49  CHECK((v1) == (v2), msg << "(" << (v1) << " vs " << (v2) << ") ")
50 
51 #define CHECK_STR_EQUAL(s1, s2, msg) \
52  CHECK(strcmp((s1), (s2)) == 0, msg << " (" << (s1) << " vs " << (s2) << ") ")
53 
54 // This script returns 0 if the files have identical contents except for event
55 // ordering, and a nonzero result if the contents differ or an error occured.
56 //
57 // If the optional dump_data_on_failure flag is set, it will also dump the
58 // mismatching event data to stdout on failure for manual inspection.
59 //
60 // If the optional skip_unsupported_branches flag is set, the script will ignore
61 // unsupported branch types in the input file instead of aborting.
62 //
63 
64 int
65 compareRootFiles(std::string file1,
66  std::string file2,
67  bool dump_data_on_failure = false,
68  bool skip_unsupported_branches = false)
69 {
70  std::cout << "Comparing ROOT files " << file1 << " and " << file2
71  << std::endl;
72 
73  std::cout << "* Opening the files..." << std::endl;
74  HomogeneousPair<TFile> files{file1.c_str(), file2.c_str()};
75  if (files.first.IsZombie()) {
76  std::cout << " - Could not open file " << file1 << "!" << std::endl;
77  return 2;
78  } else if (files.second.IsZombie()) {
79  std::cout << " - Could not open file " << file2 << "!" << std::endl;
80  return 2;
81  }
82 
83  std::cout << "* Extracting file keys..." << std::endl;
85  {
86  // This is how we would extract keys from one file
87  const auto loadKeys = [](const TFile& file, std::vector<TKey*>& target) {
88  const int keyCount = file.GetNkeys();
89  target.reserve(keyCount);
90  TIter keyIter{file.GetListOfKeys()};
91  for (int i = 0; i < keyCount; ++i) {
92  target.emplace_back(dynamic_cast<TKey*>(keyIter()));
93  }
94  };
95 
96  // Do it for each of our files
97  loadKeys(files.first, fileKeys.first);
98  loadKeys(files.second, fileKeys.second);
99  }
100 
101  std::cout << "* Selecting the latest key cycle..." << std::endl;
102  std::vector<HomogeneousPair<TKey*>> keyPairs;
103  {
104  // For each file and for each key name, we want to know what is the latest
105  // key cycle, and who is the associated key object
106  using KeyMetadata = std::pair<short, TKey*>;
107  using FileMetadata = std::map<std::string, KeyMetadata>;
109 
110  // This is how we compute this metadata for a single file
111  const auto findLatestCycle
112  = [](const std::vector<TKey*>& keys, FileMetadata& target) {
113  // Iterate through the file's keys
114  for (const auto key : keys) {
115  // Extract information about the active key
116  const std::string keyName{key->GetName()};
117  const short newCycle{key->GetCycle()};
118 
119  // Do we already know of a key with the same name?
120  auto latestCycleIter = target.find(keyName);
121  if (latestCycleIter != target.end()) {
122  // If so, keep the key with the most recent cycle number
123  auto& latestCycleMetadata = latestCycleIter->second;
124  if (newCycle > latestCycleMetadata.first) {
125  latestCycleMetadata = {newCycle, key};
126  }
127  } else {
128  // If not, this is obviously the most recent key we've seen so
129  // far
130  target.emplace(keyName, KeyMetadata{newCycle, key});
131  }
132  }
133  };
134 
135  // We'll compute this information for both of our files...
136  std::cout << " - Finding the latest cycle for each file..." << std::endl;
137  findLatestCycle(fileKeys.first, metadata.first);
138  findLatestCycle(fileKeys.second, metadata.second);
139 
140  // ...and then we'll group the latest keys by name, detect keys which only
141  // exist in a single file along the way, and report that as an error
142  std::cout << " - Grouping per-file latest keys..." << std::endl;
143  {
144  // Make sure that both files have the same amount of keys once duplicate
145  // versions are removed
146  const auto f1KeyCount = metadata.first.size();
147  const auto f2KeyCount = metadata.second.size();
148  CHECK_EQUAL(
149  f1KeyCount, f2KeyCount, " o Number of keys does not match");
150  keyPairs.reserve(f1KeyCount);
151 
152  // Iterate through the keys, in the same order (as guaranteed by std::map)
153  for (auto f1MetadataIter = metadata.first.cbegin(),
154  f2MetadataIter = metadata.second.cbegin();
155  f1MetadataIter != metadata.first.cend();
156  ++f1MetadataIter, ++f2MetadataIter) {
157  // Do the keys have the same name?
158  const auto& f1KeyName = f1MetadataIter->first;
159  const auto& f2KeyName = f2MetadataIter->first;
160  CHECK_EQUAL(f1KeyName, f2KeyName, " o Key names do not match");
161 
162  // If so, extract the associated key pair
163  keyPairs.emplace_back(f1MetadataIter->second.second,
164  f2MetadataIter->second.second);
165  }
166  }
167  }
168 
169  std::cout << "* Comparing key metadata..." << std::endl;
170  for (const auto& keyPair : keyPairs) {
171  const auto& key1 = keyPair.first;
172  const auto& key2 = keyPair.second;
173 
174  CHECK_STR_EQUAL(key1->GetClassName(),
175  key2->GetClassName(),
176  " - Class name does not match!");
178  key1->GetTitle(), key2->GetTitle(), " - Title does not match!");
179  CHECK_EQUAL(key1->GetVersion(),
180  key2->GetVersion(),
181  " - Key version does not match!");
182  }
183 
184  // NOTE: The current version of this script only supports TTree file contents.
185  // It may be extended later if the need for other data formats arise.
186  std::cout << "* Extracting TTrees..." << std::endl;
187  std::vector<HomogeneousPair<TTree*>> treePairs;
188  for (const auto& keyPair : keyPairs) {
189  TObject* obj1 = keyPair.first->ReadObj();
190  TObject* obj2 = keyPair.second->ReadObj();
191 
192  CHECK_STR_EQUAL(obj1->ClassName(),
193  obj2->ClassName(),
194  " - Object type does not match!");
196  obj1->ClassName(), "TTree", " - Non-TTree input is not supported!");
197 
198  treePairs.emplace_back(dynamic_cast<TTree*>(obj1),
199  dynamic_cast<TTree*>(obj2));
200  }
201 
202  std::cout << "* Comparing the trees..." << std::endl;
203  for (const auto& treePair : treePairs) {
204  const auto& tree1 = treePair.first;
205  const auto& tree2 = treePair.second;
206 
207  std::cout << " - Comparing tree " << tree1->GetName() << "..."
208  << std::endl;
209 
210  std::cout << " o Comparing tree-wide metadata..." << std::endl;
211  const std::size_t t1EntryCount = tree1->GetEntries();
212  {
213  const std::size_t t2EntryCount = tree2->GetEntries();
214  CHECK_EQUAL(t1EntryCount,
215  t2EntryCount,
216  " ~ Number of entries does not match!");
217  }
218 
219  if (t1EntryCount == 0) {
220  std::cout << " o Skipping empty tree!" << std::endl;
221  continue;
222  }
223 
224  std::cout << " o Preparing for tree readout..." << std::endl;
225  TTreeReader t1Reader(tree1);
226  TTreeReader t2Reader(tree2);
228  t1Reader, t2Reader, t1EntryCount};
229 
230  std::cout << " o Comparing branch metadata..." << std::endl;
231  std::vector<HomogeneousPair<TBranch*>> branchPairs;
232  {
233  // Check number of branches and allocate branch storage
234  const int t1BranchCount = tree1->GetNbranches();
235  const int t2BranchCount = tree2->GetNbranches();
236  CHECK_EQUAL(t1BranchCount,
237  t2BranchCount,
238  " ~ Number of branches does not match!");
239  branchPairs.reserve(t1BranchCount);
240 
241  // Extract branches using TTree::GetListOfBranches()
242  TIter t1BranchIter{tree1->GetListOfBranches()};
243  TIter t2BranchIter{tree2->GetListOfBranches()};
244  for (int i = 0; i < t1BranchCount; ++i) {
245  branchPairs.emplace_back(dynamic_cast<TBranch*>(t1BranchIter()),
246  dynamic_cast<TBranch*>(t2BranchIter()));
247  }
248  }
249 
250  std::cout << " o Setting up branch-specific processing..." << std::endl;
251  std::vector<BranchComparisonHarness> branchComparisonHarnesses;
252  branchComparisonHarnesses.reserve(branchPairs.size());
253  for (const auto& branchPair : branchPairs) {
254  const auto& branch1 = branchPair.first;
255  const auto& branch2 = branchPair.second;
256 
257  std::cout << " ~ Checking branch metadata..." << std::endl;
258  std::string b1ClassName, b1BranchName;
259  EDataType b1DataType;
260  {
261  std::string b2ClassName, b2BranchName;
262  EDataType b2DataType;
263  TClass* unused;
264 
265  b1ClassName = branch1->GetClassName();
266  b2ClassName = branch2->GetClassName();
267  CHECK_EQUAL(
268  b1ClassName, b2ClassName, " + Class name does not match!");
269  branch1->GetExpectedType(unused, b1DataType);
270  branch2->GetExpectedType(unused, b2DataType);
271  CHECK_EQUAL(
272  b1DataType, b2DataType, " + Raw data type does not match!");
273  const int b1LeafCount = branch1->GetNleaves();
274  const int b2LeafCount = branch2->GetNleaves();
275  CHECK_EQUAL(b1LeafCount,
276  b2LeafCount,
277  " + Number of leaves does not match!");
278  CHECK_EQUAL(
279  b1LeafCount,
280  1,
281  " + Branches with several leaves are not supported!");
282  b1BranchName = branch1->GetName();
283  b2BranchName = branch2->GetName();
284  CHECK_EQUAL(b1BranchName,
285  b2BranchName,
286  " + Branch name does not match!");
287  }
288 
289  std::cout << " ~ Building comparison harness for branch "
290  << b1BranchName << "..." << std::endl;
291  try {
292  auto branchHarness = BranchComparisonHarness::create(
293  treeMetadata, b1BranchName, b1DataType, b1ClassName);
294  branchComparisonHarnesses.emplace_back(std::move(branchHarness));
296  // When encountering an unsupported branch type, we can either skip
297  // the branch or abort depending on configuration
298  std::cout << " + Unsupported branch type! "
299  << "(eDataType: " << b1DataType << ", ClassName: \""
300  << b1ClassName << "\")" << std::endl;
301  if (skip_unsupported_branches) {
302  continue;
303  } else {
304  return 3;
305  }
306  }
307  }
308 
309  std::cout << " o Reading event data..." << std::endl;
310  for (std::size_t i = 0; i < t1EntryCount; ++i) {
311  // Move to the next TTree entry (= next event)
312  t1Reader.Next();
313  t2Reader.Next();
314 
315  // Load the data associated with each branch
316  for (auto& branchHarness : branchComparisonHarnesses) {
317  branchHarness.loadCurrentEvent();
318  }
319  }
320 
321  std::cout << " o Sorting the first tree..." << std::endl;
322  {
323  std::cout << " ~ Defining event comparison operator..." << std::endl;
324  IndexComparator t1CompareEvents
325  = [&branchComparisonHarnesses](std::size_t i,
326  std::size_t j) -> Ordering {
327  for (auto& branchHarness : branchComparisonHarnesses) {
328  const auto order = branchHarness.sortHarness.first.first(i, j);
329  if (order != Ordering::EQUAL) { return order; }
330  }
331  return Ordering::EQUAL;
332  };
333 
334  std::cout << " ~ Defining event swapping operator..." << std::endl;
335  IndexSwapper t1SwapEvents
336  = [&branchComparisonHarnesses](std::size_t i, std::size_t j) {
337  for (auto& branchHarness : branchComparisonHarnesses) {
338  branchHarness.sortHarness.first.second(i, j);
339  }
340  };
341 
342  std::cout << " ~ Running quicksort on the tree..." << std::endl;
343  quickSort(0, t1EntryCount - 1, t1CompareEvents, t1SwapEvents);
344  }
345 
346  std::cout << " o Sorting the second tree..." << std::endl;
347  {
348  std::cout << " ~ Defining event comparison operator..." << std::endl;
349  IndexComparator t2CompareEvents
350  = [&branchComparisonHarnesses](std::size_t i,
351  std::size_t j) -> Ordering {
352  for (auto& branchHarness : branchComparisonHarnesses) {
353  const auto order = branchHarness.sortHarness.second.first(i, j);
354  if (order != Ordering::EQUAL) { return order; }
355  }
356  return Ordering::EQUAL;
357  };
358 
359  std::cout << " ~ Defining event swapping operator..." << std::endl;
360  IndexSwapper t2SwapEvents
361  = [&branchComparisonHarnesses](std::size_t i, std::size_t j) {
362  for (auto& branchHarness : branchComparisonHarnesses) {
363  branchHarness.sortHarness.second.second(i, j);
364  }
365  };
366 
367  std::cout << " ~ Running quicksort on the tree..." << std::endl;
368  quickSort(0, t1EntryCount - 1, t2CompareEvents, t2SwapEvents);
369  }
370 
371  std::cout << " o Checking that both trees are now equal..." << std::endl;
372  for (auto& branchHarness : branchComparisonHarnesses) {
373  std::cout << " ~ Comparing branch " << branchHarness.branchName
374  << "..." << std::endl;
375  if (!branchHarness.eventDataEqual()) {
376  std::cout << " + Branch contents do not match!" << std::endl;
377  if (dump_data_on_failure) {
378  std::cout << " + Dumping branch contents:" << std::endl;
379  branchHarness.dumpEventData();
380  }
381  return 4;
382  }
383  }
384  }
385 
386  std::cout << "* Input files are equal, event order aside!" << std::endl;
387  return 0;
388 }