EIC Software
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
compareRootFiles.hpp
Go to the documentation of this file. Or view the newest version in sPHENIX GitHub for file compareRootFiles.hpp
1 // This file is part of the Acts project.
2 //
3 // Copyright (C) 2017 CERN for the benefit of the Acts project
4 //
5 // This Source Code Form is subject to the terms of the Mozilla Public
6 // License, v. 2.0. If a copy of the MPL was not distributed with this
7 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 
9 #pragma once
10 
11 #include <cmath>
12 #include <exception>
13 #include <functional>
14 #include <sstream>
15 #include <vector>
16 
17 #include "TDictionary.h"
18 #include "TTreeReaderValue.h"
19 
20 // Pairs of elements of the same type
21 template <typename T>
22 using HomogeneousPair = std::pair<T, T>;
23 
24 // === TYPE ERASURE FOR CONCRETE DATA ===
25 
26 // Minimal type-erasure wrapper for std::vector<T>. This will be used as a
27 // workaround to compensate for the absence of C++17's std::any in Cling.
28 class AnyVector {
29  public:
30  // Create a type-erased vector<T>, using proposed constructor arguments.
31  // Returns a pair containing the type-erased vector and a pointer to the
32  // underlying concrete vector.
33  template <typename T, typename... Args>
34  static std::pair<AnyVector, std::vector<T>*> create(Args&&... args) {
35  std::vector<T>* vector = new std::vector<T>(std::forward<Args>(args)...);
36  std::function<void()> deleter = [vector] { delete vector; };
37  return std::make_pair(
38  AnyVector{static_cast<void*>(vector), std::move(deleter)}, vector);
39  }
40 
41  // Default-construct a null type-erased vector
42  AnyVector() : m_vector{nullptr} {}
43 
44  // Move-construct a type-erased vector
45  AnyVector(AnyVector&& other)
46  : m_vector{other.m_vector}, m_deleter{std::move(other.m_deleter)} {
47  other.m_vector = nullptr;
48  }
49 
50  // Move-assign a type-erased vector
51  AnyVector& operator=(AnyVector&& other) {
52  if (&other != this) {
53  m_vector = other.m_vector;
54  m_deleter = std::move(other.m_deleter);
55  other.m_vector = nullptr;
56  }
57  return *this;
58  }
59 
60  // Forbid copies of type-erased vectors
61  AnyVector(const AnyVector&) = delete;
62  AnyVector& operator=(const AnyVector&) = delete;
63 
64  // Delete a type-erased vector
66  if (m_vector)
67  m_deleter();
68  }
69 
70  private:
71  // Construct a type-erased vector from a concrete vector
72  AnyVector(void* vector, std::function<void()>&& deleter)
73  : m_vector{vector}, m_deleter{std::move(deleter)} {}
74 
75  void* m_vector; // Casted std::vector<T>*
76  std::function<void()> m_deleter; // Deletes the underlying vector
77 };
78 
79 // === GENERIC DATA ORDERING ===
80 
81 // We want to check, in a single operation, how two pieces of data are ordered
82 enum class Ordering { SMALLER, EQUAL, GREATER };
83 
84 // In general, any type which implements comparison operators that behave as a
85 // mathematical total order can use this comparison function...
86 template <typename T>
87 Ordering compare(const T& x, const T& y) {
88  if (x < y) {
89  return Ordering::SMALLER;
90  } else if (x == y) {
91  return Ordering::EQUAL;
92  } else {
93  return Ordering::GREATER;
94  }
95 }
96 
97 // ...but we'll want to tweak that a little for floats, to handle NaNs better...
98 template <typename T>
99 Ordering compareFloat(const T& x, const T& y) {
100  if (std::isless(x, y)) {
101  return Ordering::SMALLER;
102  } else if (std::isgreater(x, y)) {
103  return Ordering::GREATER;
104  } else {
105  return Ordering::EQUAL;
106  }
107 }
108 
109 template <>
110 Ordering compare(const float& x, const float& y) {
111  return compareFloat(x, y);
112 }
113 
114 template <>
115 Ordering compare(const double& x, const double& y) {
116  return compareFloat(x, y);
117 }
118 
119 // ...and for vectors, where the default lexicographic comparison cannot
120 // efficiently tell all of what we want in a single vector iteration pass.
121 template <typename U>
122 Ordering compare(const std::vector<U>& v1, const std::vector<U>& v2) {
123  // First try to order by size...
124  if (v1.size() < v2.size()) {
125  return Ordering::SMALLER;
126  } else if (v1.size() > v2.size()) {
127  return Ordering::GREATER;
128  }
129  // ...if the size is identical...
130  else {
131  // ...then try to order by contents of increasing index...
132  for (std::size_t i = 0; i < v1.size(); ++i) {
133  if (v1[i] < v2[i]) {
134  return Ordering::SMALLER;
135  } else if (v1[i] > v2[i]) {
136  return Ordering::GREATER;
137  }
138  }
139 
140  // ...and declare the vectors equal if the contents are equal
141  return Ordering::EQUAL;
142  }
143 }
144 
145 // === GENERIC SORTING MECHANISM ===
146 
147 // The following functions are generic implementations of sorting algorithms,
148 // which require only a comparison operator, a swapping operator, and an
149 // inclusive range of indices to be sorted in order to operate
150 using IndexComparator = std::function<Ordering(std::size_t, std::size_t)>;
151 using IndexSwapper = std::function<void(std::size_t, std::size_t)>;
152 
153 // Selection sort has pertty bad asymptotic scaling, but it is non-recursive
154 // and in-place, which makes it a good choice for smaller inputs
155 void selectionSort(const std::size_t firstIndex, const std::size_t lastIndex,
156  const IndexComparator& compare, const IndexSwapper& swap) {
157  using namespace std;
158  for (size_t targetIndex = firstIndex; targetIndex < lastIndex;
159  ++targetIndex) {
160  size_t minIndex = targetIndex;
161  for (std::size_t readIndex = targetIndex + 1; readIndex <= lastIndex;
162  ++readIndex) {
163  if (compare(readIndex, minIndex) == Ordering::SMALLER) {
164  minIndex = readIndex;
165  }
166  }
167  if (minIndex != targetIndex)
168  swap(minIndex, targetIndex);
169  }
170 }
171 
172 // Quick sort is used as the top-level sorting algorithm for our datasets
173 void quickSort(const std::size_t firstIndex, const std::size_t lastIndex,
174  const IndexComparator& compare, const IndexSwapper& swap) {
175  // We switch to non-recursive selection sort when the range becomes too small.
176  // This optimization voids the need for detection of 0- and 1-element input.
177  static const std::size_t NON_RECURSIVE_THRESHOLD = 25;
178  if (lastIndex - firstIndex < NON_RECURSIVE_THRESHOLD) {
179  selectionSort(firstIndex, lastIndex, compare, swap);
180  return;
181  }
182 
183  // We'll use the midpoint as a pivot. Later on, we can switch to more
184  // elaborate pivot selection schemes if their usefulness for our use case
185  // (pseudorandom events with thread-originated reordering) is demonstrated.
186  std::size_t pivotIndex = firstIndex + (lastIndex - firstIndex) / 2;
187 
188  // Partition the data around the pivot using Hoare's scheme
189  std::size_t splitIndex;
190  {
191  // Start with two indices one step beyond each side of the array
192  std::size_t i = firstIndex - 1;
193  std::size_t j = lastIndex + 1;
194  while (true) {
195  // Move left index forward at least once, and until an element which is
196  // greater than or equal to the pivot is detected.
197  do {
198  i = i + 1;
199  } while (compare(i, pivotIndex) == Ordering::SMALLER);
200 
201  // Move right index backward at least once, and until an element which is
202  // smaller than or equal to the pivot is detected
203  do {
204  j = j - 1;
205  } while (compare(j, pivotIndex) == Ordering::GREATER);
206 
207  // By transitivity of inequality, the element at location i is greater
208  // than or equal to the one at location j, and a swap could be required
209  if (i < j) {
210  // These elements are in the wrong order, swap them
211  swap(i, j);
212 
213  // Don't forget to keep track the pivot's index along the way, as this
214  // is currently the only way by which we can refer to the pivot element.
215  if (i == pivotIndex) {
216  pivotIndex = j;
217  } else if (j == pivotIndex) {
218  pivotIndex = i;
219  }
220  } else {
221  // If i and j went past each other, our partitioning is done
222  splitIndex = j;
223  break;
224  }
225  }
226  }
227 
228  // Now, we'll recursively sort both partitions using quicksort. We should
229  // recurse in the smaller range first, so as to leverage compiler tail call
230  // optimization if available.
231  if (splitIndex - firstIndex <= lastIndex - splitIndex - 1) {
232  quickSort(firstIndex, splitIndex, compare, swap);
233  quickSort(splitIndex + 1, lastIndex, compare, swap);
234  } else {
235  quickSort(splitIndex + 1, lastIndex, compare, swap);
236  quickSort(firstIndex, splitIndex, compare, swap);
237  }
238 }
239 
240 // === GENERIC TTREE BRANCH MANIPULATION MECHANISM ===
241 
242 // When comparing a pair of TTrees, we'll need to set up quite a few facilities
243 // for each branch. Since this setup is dependent on the branch data type, which
244 // is only known at runtime, it is quite involved, which is why we extracted it
245 // to a separate struct and its constructor.
247  // We'll keep track of the branch name for debugging purposes
248  std::string branchName;
249 
250  // Type-erased event data for the current branch, in both trees being compared
252 
253  // Function which loads the active event data for the current branch. This is
254  // to be performed for each branch and combined with TTreeReader-based event
255  // iteration on both trees.
256  void loadCurrentEvent() { (*m_eventLoaderPtr)(); }
257 
258  // Functors which compare two events within a given tree and order them
259  // with respect to one another, and which swap two events. By combining such
260  // functionality for each branch, a global tree order can be produced.
262 
263  // Functor which compares the current event data in *both* trees and tells
264  // whether it is identical. The comparison is order-sensitive, so events
265  // should previously have been sorted in a canonical order in both trees.
266  // By combining the results for each branch, global tree equality is defined.
267  using TreeComparator = std::function<bool()>;
269 
270  // Functor which dumps the event data for the active event side by side, in
271  // two columns. This enables manual comparison during debugging.
272  std::function<void()> dumpEventData;
273 
274  // General metadata about the tree which is identical for every branch
275  struct TreeMetadata {
276  TTreeReader& tree1Reader;
277  TTreeReader& tree2Reader;
278  const std::size_t entryCount;
279  };
280 
281  // This exception will be thrown if an unsupported branch type is encountered
282  class UnsupportedBranchType : public std::exception {};
283 
284  // Type-erased factory of branch comparison harnesses, taking ROOT run-time
285  // type information as input in order to select an appropriate C++ constructor
287  const std::string& branchName,
288  const EDataType dataType,
289  const std::string& className) {
290  switch (dataType) {
291  case kChar_t:
292  return BranchComparisonHarness::create<char>(treeMetadata, branchName);
293  case kUChar_t:
294  return BranchComparisonHarness::create<unsigned char>(treeMetadata,
295  branchName);
296  case kShort_t:
297  return BranchComparisonHarness::create<short>(treeMetadata, branchName);
298  case kUShort_t:
299  return BranchComparisonHarness::create<unsigned short>(treeMetadata,
300  branchName);
301  case kInt_t:
302  return BranchComparisonHarness::create<int>(treeMetadata, branchName);
303  case kUInt_t:
304  return BranchComparisonHarness::create<unsigned int>(treeMetadata,
305  branchName);
306  case kLong_t:
307  return BranchComparisonHarness::create<long>(treeMetadata, branchName);
308  case kULong_t:
309  return BranchComparisonHarness::create<unsigned long>(treeMetadata,
310  branchName);
311  case kULong64_t:
312  return BranchComparisonHarness::create<unsigned long long>(treeMetadata,
313  branchName);
314 
315  case kFloat_t:
316  return BranchComparisonHarness::create<float>(treeMetadata, branchName);
317  case kDouble_t:
318  return BranchComparisonHarness::create<double>(treeMetadata,
319  branchName);
320  case kBool_t:
321  return BranchComparisonHarness::create<bool>(treeMetadata, branchName);
322  case kOther_t:
323  if (className.substr(0, 6) == "vector") {
324  std::string elementType = className.substr(7, className.size() - 8);
325  return BranchComparisonHarness::createVector(treeMetadata, branchName,
326  std::move(elementType));
327  } else {
328  throw UnsupportedBranchType();
329  }
330  default:
331  throw UnsupportedBranchType();
332  }
333  }
334 
335  private:
336  // Under the hood, the top-level factory calls the following function
337  // template, parametrized with the proper C++ data type
338  template <typename T>
340  const std::string& branchName) {
341  // Our result will eventually go there
343 
344  // Save the branch name for debugging purposes
345  result.branchName = branchName;
346 
347  // Setup type-erased event data storage
348  auto tree1DataStorage = AnyVector::create<T>();
349  auto tree2DataStorage = AnyVector::create<T>();
350  result.eventData = std::make_pair(std::move(tree1DataStorage.first),
351  std::move(tree2DataStorage.first));
352  std::vector<T>& tree1Data = *tree1DataStorage.second;
353  std::vector<T>& tree2Data = *tree2DataStorage.second;
354 
355  // Use our advance knowledge of the event count to preallocate storage
356  tree1Data.reserve(treeMetadata.entryCount);
357  tree2Data.reserve(treeMetadata.entryCount);
358 
359  // Setup event data readout
360  result.m_eventLoaderPtr.reset(
361  new EventLoaderT<T>{treeMetadata.tree1Reader, treeMetadata.tree2Reader,
362  branchName, tree1Data, tree2Data});
363 
364  // Setup event comparison and swapping for each tree
365  result.sortHarness = std::make_pair(
366  std::make_pair(
367  [&tree1Data](std::size_t i, std::size_t j) -> Ordering {
368  return compare(tree1Data[i], tree1Data[j]);
369  },
370  [&tree1Data](std::size_t i, std::size_t j) {
371  std::swap(tree1Data[i], tree1Data[j]);
372  }),
373  std::make_pair(
374  [&tree2Data](std::size_t i, std::size_t j) -> Ordering {
375  return compare(tree2Data[i], tree2Data[j]);
376  },
377  [&tree2Data](std::size_t i, std::size_t j) {
378  std::swap(tree2Data[i], tree2Data[j]);
379  }));
380 
381  // Setup order-sensitive tree comparison
382  result.eventDataEqual = [&tree1Data, &tree2Data]() -> bool {
383  for (std::size_t i = 0; i < tree1Data.size(); ++i) {
384  if (compare(tree1Data[i], tree2Data[i]) != Ordering::EQUAL)
385  return false;
386  }
387  return true;
388  };
389 
390  // Add a debugging method to dump event data
391  result.dumpEventData = [&tree1Data, &tree2Data] {
392  std::cout << "File 1 \tFile 2" << std::endl;
393  for (std::size_t i = 0; i < tree1Data.size(); ++i) {
394  std::cout << toString(tree1Data[i]) << " \t"
395  << toString(tree2Data[i]) << std::endl;
396  }
397  };
398 
399  // ...and we're good to go!
400  return std::move(result);
401  }
402 
403  // Because the people who created TTreeReaderValue could not bother to make it
404  // movable (for moving it into a lambda), or even just virtually destructible
405  // (for moving a unique_ptr into the lambda), loadEventData can only be
406  // implemented through lots of unpleasant C++98-ish boilerplate.
407  class IEventLoader {
408  public:
409  virtual ~IEventLoader() = default;
410  virtual void operator()() = 0;
411  };
412 
413  template <typename T>
414  class EventLoaderT : public IEventLoader {
415  public:
416  EventLoaderT(TTreeReader& tree1Reader, TTreeReader& tree2Reader,
417  const std::string& branchName, std::vector<T>& tree1Data,
418  std::vector<T>& tree2Data)
419  : branch1Reader{tree1Reader, branchName.c_str()},
420  branch2Reader{tree2Reader, branchName.c_str()},
421  branch1Data(tree1Data),
422  branch2Data(tree2Data) {}
423 
424  void operator()() final override {
425  branch1Data.push_back(*branch1Reader);
426  branch2Data.push_back(*branch2Reader);
427  }
428 
429  private:
430  TTreeReaderValue<T> branch1Reader, branch2Reader;
431  std::vector<T>& branch1Data;
432  std::vector<T>& branch2Data;
433  };
434 
435  std::unique_ptr<IEventLoader> m_eventLoaderPtr;
436 
437  // This helper factory helps building branches associated with std::vectors
438  // of data, which are the only STL collection that we support at the moment.
440  const std::string& branchName,
441  const std::string elemType) {
442 // We support vectors of different types by switching across type (strings)
443 #define CREATE_VECTOR__HANDLE_TYPE(type_name) \
444  if (elemType == #type_name) { \
445  return BranchComparisonHarness::create<std::vector<type_name>>( \
446  treeMetadata, branchName); \
447  }
448 
449  // Handle vectors of booleans
451 
452  // Handle vectors of all standard floating-point types
454  double)
455 
456 // For integer types, we'll want to handle both signed and unsigned versions
457 #define CREATE_VECTOR__HANDLE_INTEGER_TYPE(integer_type_name) \
458  CREATE_VECTOR__HANDLE_TYPE(integer_type_name) \
459  else CREATE_VECTOR__HANDLE_TYPE(unsigned integer_type_name)
460 
461  // Handle vectors of all standard integer types
464 
465  // Throw an exception if the vector element type is not recognized
466  else throw UnsupportedBranchType();
467  }
468 
469  // This helper method provides general string conversion for all supported
470  // branch event data types.
471  template <typename T>
472  static std::string toString(const T& data) {
473  std::ostringstream oss;
474  oss << data;
475  return oss.str();
476  }
477 
478  template <typename U>
479  static std::string toString(const std::vector<U>& vector) {
480  std::ostringstream oss{"{ "};
481  for (const auto& data : vector) {
482  oss << data << " \t";
483  }
484  oss << " }";
485  return oss.str();
486  }
487 };