EIC Software
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
build.py
Go to the documentation of this file. Or view the newest version in sPHENIX GitHub for file build.py
1 #!/usr/bin/env python
2 
3 """Script for building ROOT trees.
4 
5 Thomas Burton, BNL, 5th September 2013, tpb@bnl.gov
6 
7 For usage run
8  build.py --help
9 
10 Prerequisites:
11 
12 -- ROOT installed with Python support (active by default for recent versions)
13 -- ROOT.py should be accessible via $PYTHONPATH
14 -- libeicsmear should be accessible vi $LD_LIBRARY_PATH
15 
16 If in doubt, adding the ROOT library directory (e.g. $ROOTSYS/lib)
17 to both LD_LIBRARY_PATH and PYTHONPATH should suffice.
18 
19 """
20 
21 import os
22 import Queue
23 
24 # Names of command to zip and unzip each type of zipped file
25 ZIP = {'.gz': 'gzip', '.bz2': 'bzip2'}
26 UNZIP = {'.gz': 'gunzip', '.bz2': 'bunzip2'}
27 
28 class File:
29  """Processes an input file into a ROOT tree file."""
30 
31  # Use Queue to manage threading.
32  # This Queue stores all File objects.
33  queue = Queue.Queue()
34 
35  # List of allowed input file extensions
36  supported_extensions = {'.txt', '.dat'}
37 
38  def __init__(self, filename, outdir, nevents, rezip):
39  """Constructor.
40 
41  Initialise with the input file and output information.
42  Determines whether the input file is zipped.
43 
44  """
45  name, extension = os.path.splitext(filename)
46  # self.zipext stores the zipped extension if the file was zipped
47  # initially, or None if it was not zipped.
48  if extension in ZIP:
49  self.zipext = extension
50  self.rezip = rezip
51  # Get the name and extension now that the zip extension is gone
52  name, extension = os.path.splitext(name)
53  else:
54  self.zipext = None
55  self.rezip = False
56  self.name = name # Without extension
57  self.ext = extension # File extension, not zipped extension
58  self.outdir = outdir
59  self.nevents = nevents
60 
61  def process(self):
62  """Build the tree for the input the File was initialised with.
63 
64  If the file is zipped, unzip it first and rezip it after making
65  the ROOT tree (unless requested not to rezip).
66 
67  """
68  import subprocess
69  if self.ext not in File.supported_extensions:
70  return
71  fullname = ''.join([self.name, self.ext])
72  # Unzip a zipped file
73  if self.zipext:
74  zipped_name = ''.join([fullname, self.zipext])
75  # subprocess.call should return 0 if unzipping is successful
76  unzipped = not subprocess.call(
77  [UNZIP[self.zipext], '-v', zipped_name])
78  else:
79  unzipped = False
80  # Catch any errors from tree building to we always rezip if asked to
81  try:
82  # Here's what we actually came to do!
83  ROOT.BuildTree(fullname, self.outdir, self.nevents)
84  except:
85  print 'Error encountered building tree'
86  if unzipped and self.rezip and self.zipext:
87  print 'Rezipping', fullname
88  subprocess.call([ZIP[self.zipext], '-v', fullname])
89 
90 
91 def processor():
92  """Function run by each thread.
93 
94  Pops Files from the queue and processes them until there are no files
95  remaining.
96 
97  """
98  while True:
99  file = File.queue.get() # Get the next file from the Queue
100  file.process() # Process the file
101  File.queue.task_done() # Inform the Queue that file is done
102 
103 def build_list(filelist, outdir='.', nevents=-1, nthreads = 1, rezip=True):
104  """Build ROOT tree files from all the listed files.
105 
106  Arguments:
107  filelist -- a list of file names
108  outdir -- the directory to which to write ROOT files [current directory]
109  nevents -- the maximum number of events per file to process [all]
110  nthreads -- the maximum number of threads permitted to run [1]
111 
112  Zipped files (either gzip or bzip2) are unzipped then rezipped
113  after the ROOT file has been made.
114 
115  """
116  import threading
117  # Generate our threads, each running the processing function
118  for i in range(nthreads):
119  t = threading.Thread(target=processor)
120  t.daemon = True
121  t.start()
122  # Populate the Queue with Files
123  for i in filelist:
124  File.queue.put(File(i, outdir, nevents, rezip))
125  # Wait for all the Queue elements to finish processing
126  File.queue.join()
127 
128 def parse():
129  """Process command line arguments and return argparse Namespace."""
130  from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
131  parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter,
132  description='build trees from files in a list or directory')
133  parser.add_argument('source', help='name of file list or directory')
134  parser.add_argument('-o', '--outdir', default='.', help='output directory')
135  parser.add_argument('-e', '--events', type=int, default=-1,
136  help='number of events per file')
137  parser.add_argument('-p', '--processes', type=int, default=1,
138  help='number of parallel processes')
139  parser.add_argument('--norezip', action='store_true',
140  help='do not rezip files')
141  return parser.parse_args()
142 
143 # Execute buildlist on all the files in a directory, the current
144 # directory by default
145 if __name__ == "__main__":
146  """Executes buildlist on all the files in a list or directory."""
147  # Parse command line options
148  options = parse()
149  # Import ROOT after parsing arguments so build.py --help will still
150  # work even if ROOT cannot be located.
151  try:
152  import ROOT
153  # Try to tell ROOT to ignore command line arguments.
154  # Otherwise ROOT will intercept command line arguments and
155  # build.py --help won't work. This isn't supported in older versions
156  # of ROOT, so don't complain if it can't be done.
157  try:
158  ROOT.PyConfig.IgnoreCommandLineOptions = True
159  except AttributeError:
160  pass
161  # Load libeicsmear, raise and error if it can't be found
162  if ROOT.gSystem.Load('libeicsmear') < 0:
163  raise IOError('libeicsmear could not be located')
164  # If importing ROOT and libeicsmear failed print an error and quit
165  except Exception as error:
166  print 'Error:', error
167  quit()
168  # Try to get list of file names from an input file
169  if os.path.isfile(options.source):
170  with open(options.source) as file:
171  files = file.read().splitlines()
172  # Try to get list of file names from all files in a directory
173  elif os.path.isdir(options.source):
174  files = [os.path.join(options.source, file)
175  for file in os.listdir(options.source)]
176  # Got some bad input, complain
177  else:
178  print options.source, 'not recognized... quitting'
179  quit()
180  # Build everything
181  build_list(files, options.outdir, options.events, options.processes,
182  not options.norezip)