EIC Software
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
dfe_io_dsv.hpp
Go to the documentation of this file. Or view the newest version in sPHENIX GitHub for file dfe_io_dsv.hpp
1 // SPDX-License-Identifier: MIT
2 // Copyright 2015,2018-2020 Moritz Kiehn
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a copy
5 // of this software and associated documentation files (the "Software"), to deal
6 // in the Software without restriction, including without limitation the rights
7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 // copies of the Software, and to permit persons to whom the Software is
9 // furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 // SOFTWARE.
21 
26 
27 #pragma once
28 
29 #include <algorithm>
30 #include <array>
31 #include <fstream>
32 #include <limits>
33 #include <sstream>
34 #include <stdexcept>
35 #include <string>
36 #include <tuple>
37 #include <type_traits>
38 #include <utility>
39 #include <vector>
40 
41 namespace dfe {
42 namespace io_dsv_impl {
43 
45 template<char Delimiter>
46 class DsvWriter {
47 public:
48  DsvWriter() = delete;
49  DsvWriter(const DsvWriter&) = delete;
50  DsvWriter(DsvWriter&&) = default;
51  ~DsvWriter() = default;
52  DsvWriter& operator=(const DsvWriter&) = delete;
53  DsvWriter& operator=(DsvWriter&&) = default;
54 
60  DsvWriter(
61  const std::vector<std::string>& columns, const std::string& path,
62  int precision = std::numeric_limits<double>::max_digits10);
63 
72  template<typename Arg0, typename... Args>
73  void append(Arg0&& arg0, Args&&... args);
74 
75 private:
76  std::ofstream m_file;
77  std::size_t m_num_columns;
78 
79  // enable_if to prevent this overload to be used for std::vector<T> as well
80  template<typename T>
81  static std::enable_if_t<
82  std::is_arithmetic<std::decay_t<T>>::value
84  unsigned>
85  write(T&& x, std::ostream& os);
86  template<typename T, typename Allocator>
87  static unsigned write(const std::vector<T, Allocator>& xs, std::ostream& os);
88 };
89 
91 template<char Delimiter>
92 class DsvReader {
93 public:
94  DsvReader() = delete;
95  DsvReader(const DsvReader&) = delete;
96  DsvReader(DsvReader&&) = default;
97  ~DsvReader() = default;
98  DsvReader& operator=(const DsvReader&) = delete;
99  DsvReader& operator=(DsvReader&&) = default;
100 
104  DsvReader(const std::string& path);
105 
110  bool read(std::vector<std::string>& columns);
111 
113  std::size_t num_lines() const { return m_num_lines; }
114 
115 private:
116  std::ifstream m_file;
117  std::string m_line;
118  std::size_t m_num_lines = 0;
119 };
120 
122 template<char Delimiter, typename NamedTuple>
124 public:
125  NamedTupleDsvWriter() = delete;
126  NamedTupleDsvWriter(const NamedTupleDsvWriter&) = delete;
128  ~NamedTupleDsvWriter() = default;
131 
137  const std::string& path,
138  int precision = std::numeric_limits<double>::max_digits10)
139  : m_writer(colum_names(), path, precision) {}
140 
142  void append(const NamedTuple& record) {
143  append_impl(
144  record, std::make_index_sequence<
146  }
147 
148 private:
150 
151  static std::vector<std::string> colum_names() {
152  const auto& from_record = NamedTuple::names();
153  return {from_record.begin(), from_record.end()};
154  }
155  template<std::size_t... I>
156  void append_impl(const NamedTuple& values, std::index_sequence<I...>) {
157  using std::get;
158  m_writer.append(get<I>(values)...);
159  }
160 };
161 
162 // string conversion helper functions
163 
164 template<typename T>
165 static void
166 parse(const std::string& str, T& value) {
167  // TODO use somthing w/ lower overhead then stringstream e.g. std::from_chars
168  std::istringstream is(str);
169  is >> value;
170 }
171 
182 template<char Delimiter, typename NamedTuple>
184 public:
185  NamedTupleDsvReader() = delete;
186  NamedTupleDsvReader(const NamedTupleDsvReader&) = delete;
188  ~NamedTupleDsvReader() = default;
191 
201  const std::string& path,
202  const std::vector<std::string>& optional_columns = {},
203  bool verify_header = true);
204 
213  bool read(NamedTuple& record);
214 
219  template<typename T>
220  bool read(NamedTuple& record, std::vector<T>& extra);
221 
223  std::size_t num_extra_columns() const { return m_extra_columns.size(); }
225  std::size_t num_records() const { return m_reader.num_lines() - 1u; }
226 
227 private:
228  // the equivalent std::tuple-like type
229  using Tuple = typename NamedTuple::Tuple;
230 
232  std::vector<std::string> m_columns;
233  // #columns is fixed to a reasonable value after reading the header
234  std::size_t m_num_columns = SIZE_MAX;
235  // map tuple index to column index in the file, SIZE_MAX for missing elements
237  // column indices that do not map to a tuple items
238  std::vector<std::size_t> m_extra_columns;
239 
240  void use_default_columns();
241  void parse_header(const std::vector<std::string>& optional_columns);
242  template<std::size_t... I>
243  void parse_record(NamedTuple& record, std::index_sequence<I...>) const {
244  // see namedtuple_impl::print_tuple for explanation
245  // allow different column ordering on file and optional columns
246  using Vacuum = int[];
247  (void)Vacuum{(parse_element<I>(record), 0)...};
248  }
249  template<std::size_t I>
250  void parse_element(NamedTuple& record) const {
251  using std::get;
252  if (m_tuple_column_map[I] != SIZE_MAX) {
253  parse(m_columns[m_tuple_column_map[I]], get<I>(record));
254  }
255  }
256 };
257 
258 // implementation writer
259 
260 template<char Delimiter>
262  const std::vector<std::string>& columns, const std::string& path,
263  int precision)
264  : m_file(
265  path, std::ios_base::binary | std::ios_base::out | std::ios_base::trunc)
266  , m_num_columns(columns.size()) {
267  if (not m_file.is_open() or m_file.fail()) {
268  throw std::runtime_error("Could not open file '" + path + "'");
269  }
270  m_file.precision(precision);
271  if (m_num_columns == 0) {
272  throw std::invalid_argument("No columns were specified");
273  }
274  // write column names as header row
275  append(columns);
276 }
277 
278 template<char Delimiter>
279 template<typename Arg0, typename... Args>
280 inline void
281 DsvWriter<Delimiter>::append(Arg0&& arg0, Args&&... args) {
282  // we can only check how many columns were written after they have been
283  // written. write to temporary first to prevent bad data on file.
284  std::stringstream line;
285  // ensure consistent formatting
286  line.precision(m_file.precision());
287  unsigned written_columns[] = {
288  // write the first item without a delimiter and store columns written
289  write(std::forward<Arg0>(arg0), line),
290  // for all other items, write the delimiter followed by the item itself
291  // (<expr1>, <expr2>) use the comma operator (yep, ',' in c++ is a weird
292  // but helpful operator) to execute both expression and return the return
293  // value of the last one, i.e. here thats the number of columns written.
294  // the ... pack expansion creates this expression for all arguments
295  (line << Delimiter, write(std::forward<Args>(args), line))...,
296  };
297  line << '\n';
298  // validate that the total number of written columns matches the specs.
299  unsigned total_columns = 0;
300  for (auto nc : written_columns) {
301  total_columns += nc;
302  }
303  if (total_columns < m_num_columns) {
304  throw std::invalid_argument("Not enough columns");
305  }
306  if (m_num_columns < total_columns) {
307  throw std::invalid_argument("Too many columns");
308  }
309  // write the line to disk and check that it actually happened
310  m_file << line.rdbuf();
311  if (not m_file.good()) {
312  throw std::runtime_error("Could not write data to file");
313  }
314 }
315 
316 template<char Delimiter>
317 template<typename T>
318 inline std::enable_if_t<
319  std::is_arithmetic<std::decay_t<T>>::value
321  unsigned>
322 DsvWriter<Delimiter>::write(T&& x, std::ostream& os) {
323  os << x;
324  return 1u;
325 }
326 
327 template<char Delimiter>
328 template<typename T, typename Allocator>
329 inline unsigned
331  const std::vector<T, Allocator>& xs, std::ostream& os) {
332  unsigned n = 0;
333  for (const auto& x : xs) {
334  if (0 < n) {
335  os << Delimiter;
336  }
337  os << x;
338  n += 1;
339  }
340  return n;
341 }
342 
343 // implementation reader
344 
345 template<char Delimiter>
346 inline DsvReader<Delimiter>::DsvReader(const std::string& path)
347  : m_file(path, std::ios_base::binary | std::ios_base::in) {
348  if (not m_file.is_open() or m_file.fail()) {
349  throw std::runtime_error("Could not open file '" + path + "'");
350  }
351 }
352 
353 template<char Delimiter>
354 inline bool
355 DsvReader<Delimiter>::read(std::vector<std::string>& columns) {
356  // read the next line and check for both end-of-file and errors
357  std::getline(m_file, m_line);
358  if (m_file.eof()) {
359  return false;
360  }
361  if (m_file.fail()) {
362  throw std::runtime_error(
363  "Could not read line " + std::to_string(m_num_lines));
364  }
365  m_num_lines += 1;
366 
367  // split the line into columns
368  columns.clear();
369  for (std::string::size_type pos = 0; pos < m_line.size();) {
370  auto del = m_line.find_first_of(Delimiter, pos);
371  if (del == std::string::npos) {
372  // reached the end of the line; also determines the last column
373  columns.emplace_back(m_line, pos);
374  break;
375  } else {
376  columns.emplace_back(m_line, pos, del - pos);
377  // start next column search after the delimiter
378  pos = del + 1;
379  }
380  }
381  return true;
382 }
383 
384 // implementation named tuple reader
385 
386 template<char Delimiter, typename NamedTuple>
388  const std::string& path, const std::vector<std::string>& optional_columns,
389  bool verify_header)
390  : m_reader(path) {
391  // optional columns only work if we verify the header
392  if ((not optional_columns.empty()) and (not verify_header)) {
393  throw std::runtime_error(
394  "Optional columns can not be used without header verification");
395  }
396  // first line is always the header
397  if (not m_reader.read(m_columns)) {
398  throw std::runtime_error("Could not read header from '" + path + "'");
399  }
400  if (verify_header) {
401  parse_header(optional_columns);
402  } else {
404  }
405 }
406 
407 template<char Delimiter, typename NamedTuple>
408 inline bool
410  if (not m_reader.read(m_columns)) {
411  return false;
412  }
413  // check for consistent entries per-line
414  if (m_columns.size() < m_num_columns) {
415  throw std::runtime_error(
416  "Too few columns in line " + std::to_string(m_reader.num_lines()));
417  }
418  if (m_num_columns < m_columns.size()) {
419  throw std::runtime_error(
420  "Too many columns in line " + std::to_string(m_reader.num_lines()));
421  }
422  // convert to tuple
423  parse_record(
424  record, std::make_index_sequence<std::tuple_size<Tuple>::value>{});
425  return true;
426 }
427 
428 template<char Delimiter, typename NamedTuple>
429 template<typename T>
430 inline bool
432  NamedTuple& record, std::vector<T>& extra) {
433  // parse columns belonging to the regular record
434  if (not read(record)) {
435  return false;
436  }
437  // parse extra columns
438  extra.resize(m_extra_columns.size());
439  for (std::size_t i = 0; i < m_extra_columns.size(); ++i) {
440  parse(m_columns[m_extra_columns[i]], extra[i]);
441  }
442  return true;
443 }
444 
445 template<char Delimiter, typename NamedTuple>
446 inline void
448  // assume row content is identical in content and order to the tuple
449  m_num_columns = std::tuple_size<Tuple>::value;
450  for (std::size_t i = 0; i < m_tuple_column_map.size(); ++i) {
451  m_tuple_column_map[i] = i;
452  }
453  // no extra columns by construction
454  m_extra_columns.clear();
455 }
456 
457 template<char Delimiter, typename NamedTuple>
458 inline void
460  const std::vector<std::string>& optional_columns) {
461  const auto& names = NamedTuple::names();
462 
463  // the number of header columns fixes the expected number of data columns
464  m_num_columns = m_columns.size();
465 
466  // check that all non-optional columns are available
467  for (const auto& name : names) {
468  // no need to for availability if the column is optional
469  auto o = std::find(optional_columns.begin(), optional_columns.end(), name);
470  if (o != optional_columns.end()) {
471  continue;
472  }
473  // missing, non-optional column mean we can not continue
474  auto c = std::find(m_columns.begin(), m_columns.end(), name);
475  if (c == m_columns.end()) {
476  throw std::runtime_error("Missing header column '" + name + "'");
477  }
478  }
479 
480  // ensure missing columns are correctly marked as such
481  m_tuple_column_map.fill(SIZE_MAX);
482 
483  // determine column-tuple mapping and extra column indices
484  m_extra_columns.clear();
485  for (std::size_t i = 0; i < m_columns.size(); ++i) {
486  // find the position of the column in the tuple.
487  auto it = std::find(names.begin(), names.end(), m_columns[i]);
488  if (it != names.end()) {
489  // establish mapping between column and tuple item position
490  m_tuple_column_map[std::distance(names.begin(), it)] = i;
491  } else {
492  // record non-tuple columns
493  m_extra_columns.push_back(i);
494  }
495  }
496 }
497 
498 } // namespace io_dsv_impl
499 
502 
505 
507 template<typename T>
509 
511 template<typename T>
513 
515 template<typename T>
517 
519 template<typename T>
521 
522 } // namespace dfe