EIC Software
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
EICAnalysisTools.py
Go to the documentation of this file. Or view the newest version in sPHENIX GitHub for file EICAnalysisTools.py
1 import sys
2 import pandas as pd
3 import numpy as np
4 import uproot
5 import scipy
6 import math
7 import glob
8 from uproot_methods import *
9 from concurrent.futures import ThreadPoolExecutor
10 
11 
12 # Define useful variables
13 
14 ## Units
15 
16 global u_fb,u_mb,u_pb
17 
18 u_fb = 1
19 u_mb = 1e12*u_fb
20 u_pb = 1e3*u_fb
21 
22 global n_gen
23 n_gen = 10e6
24 
25 
26 
27 def UprootLoad(files=[], treename="", branches=[]):
28  global n_gen
29  print(f"::UprootLoad({files}")
30  executor = ThreadPoolExecutor(8)
31 
32  #df = uproot.lazyarrays(files,
33  # treename,
34  # branches=branches,
35  # entrysteps=1000)
36 
37  filenames = glob.glob(files[0])
38 
39  data = pd.DataFrame(columns=branches)
40 
41  for filename in filenames:
42  tree = uproot.open(filename)[treename]
43  data = pd.concat([data, tree.arrays(branches, namedecode='utf-8', outputtype=pd.DataFrame)])
44 
45  n_gen = len(data) #uproot.numentries(files, treename, total=True)
46 
47  print(f"Originally generated number of collisions: {n_gen}")
48 
49  print("Exploding the DataFrame...")
50  #data = data.set_index(['bjorken_x', 'jb_x', 'jb_Q2']).apply(pd.Series.explode).reset_index()
51  #data = data.apply(pd.Series.explode)
52  print("Completed exploding the DataFrame")
53 
54  return data
55 
56 def TotalXSection(process='CC_DIS_e18_p275'):
57  global u_mb
58  if process == 'CC_DIS_e18_p275':
59  return 2.408e-08*u_mb
60  elif process == 'CC_DIS_e10_p100_CT18NNLO':
61  return 5.44044e-09*u_mb
62  elif process == 'CC_DIS_e10_p275_CT18NNLO':
63  return 1.47637e-08*u_mb
64  elif process == 'CC_DIS_e10_p275_CT1820Rs2':
65  return 1.47637e-08*u_mb
66  elif process == 'CC_DIS_e10_p100_CT1820Rs2':
67  return 5.44842e-09*u_mb
68  elif process == 'CC_DIS_e10_p275_CT1821Rs2':
69  return 1.45884e-08*u_mb
70  elif process == 'CC_DIS_e10_p100_CT1821Rs2':
71  return 5.36006e-09*u_mb
72  elif process == 'NC_DIS_e18_p275':
73  return 3.671e-06*u_mb
74 
75  print(f"EICAnalysisTools::TotalXSection == Unable to find requested process, {process}.")
76  # Don't continue. This is bad. Nonsense will result. The user should fix this.
77  sys.exit(-1)
78  return 0.0
79 
80 
81 # Compute auxiliary information for a DataFrame
82 def DISBjorkenX(row):
83  """
84  pProton = branchParticle.At(0).P4(); #these numbers 0 , 3, 5 are hardcoded in Pythia8
85  pleptonIn = branchParticle.At(3).P4();
86  pleptonOut = branchParticle.At(5).P4();
87  pPhoton = pleptonIn - pleptonOut;
88 
89  #Q2, W2, Bjorken x, y, nu.
90  Q2 = -pPhoton.M2()
91  W2 = (pProton + pPhoton).M2()
92  x = Q2 / (2. * pProton.Dot(pPhoton))
93  y = (pProton.Dot(pPhoton)) / (pProton.Dot(pleptonIn))
94  """
95 
96  particles_px = row['Particle.Px']
97  particles_py = row['Particle.Py']
98  particles_pz = row['Particle.Pz']
99  particles_E = row['Particle.E']
100 
101  # Beam Particle 4-vectors
102  iproton = 0
103  ilepton1 = 3
104  ilepton2 = 5
105 
106  pProton = TLorentzVector(particles_px[iproton], particles_py[iproton], particles_pz[iproton], particles_E[iproton])
107  pleptonIn = TLorentzVector(particles_px[ilepton1], particles_py[ilepton1], particles_pz[ilepton1], particles_E[ilepton1])
108  pleptonOut = TLorentzVector(particles_px[ilepton2], particles_py[ilepton2], particles_pz[ilepton2], particles_E[ilepton2])
109  pPhoton = pleptonIn - pleptonOut
110 
111  Q2 = -pPhoton.mag2
112  W2 = (pProton + pPhoton).mag2
113  x = Q2 / (2. * pProton.dot(pPhoton))
114 
115  # Write this value of x for each jet, since we need to do jet-level plots
116  x_array = np.ones(len(row['jet_pt'])) * x
117 
118  return x_array
119 
120 
121 
122 
123 ###################################################################
124 # Differential Computation Functions
125 ###################################################################
126 
127 # Flavour tagging study code
128 def DifferentialTaggingYield(df, x='jet_pt', xrange=[10,50], xbins=[10,12.5,15,20,25,30,40,50], which='all', process='CC_DIS_e18_p275', target_lumi = 100, taggers="jet_sip3dtag"):
129  global u_fb, n_gen
130  print(f"n_gen = {n_gen}")
131 
132  jet_flavor = np.concatenate(df['jet_flavor'].to_numpy()).ravel()
133  jet_tag = np.concatenate(df['jet_sip3dtag'].to_numpy()).ravel()
134 
135  jet_x = np.concatenate(df[x].to_numpy()).ravel()
136 
137  #jet_flavor = df['jet_flavor']
138  #jet_tag = df['jet_sip3dtag']
139 
140  #jet_x = df[x]
141 
142  if which == 'charm':
143  jet_x = jet_x[ (jet_tag == 1) & (jet_flavor == 4) ]
144  elif which == 'light':
145  jet_x = jet_x[ (jet_tag == 1) & ((jet_flavor < 4) | (jet_flavor == 21)) ]
146 
147 
148  (counts, bins) = np.histogram(jet_x, range=xrange,
149  bins=xbins)
150  #bins=40)
151 
152  bin_widths = np.diff(bins)
153  bin_centers = bins[:-1] + bin_widths/2
154 
155  errors = np.sqrt(counts)
156  rel_errors = errors/counts
157 
158 
159  # convert counts to dsigma/dpT * 100/fb
160  dsigma_dx = counts * TotalXSection(process) * target_lumi*u_fb**(-1) / n_gen #/ bin_widths
161  dsigma_dx_errors = rel_errors * dsigma_dx
162 
163 
164  print(f"========= Validation Information from DifferentialTaggingYield ========= \n \
165  \n \
166  Process considered: {process} \n \
167  Theory Cross-Section: {TotalXSection(process)} \n \
168  Target Integrated Luminosity: {target_lumi} \n \
169  Total Predicted Yield of Events: {np.sum(dsigma_dx)} \ \
170  \n \
171  ========= Validation Information from DifferentialTaggingYield =========")
172 
173 
174  return (bin_centers, bin_widths, dsigma_dx, dsigma_dx_errors)
175 
176 
177 
178 def DifferentialTaggingEfficiency(df, x='jet_pt', xrange=[10,50], xbins=[10,12.5,15,20,25,30,40,50], which='all', taggers="jet_sip3dtag"):
179 
180  jet_flavor = np.concatenate(df['jet_flavor'].to_numpy()).ravel()
181  jet_tag = np.concatenate(df['jet_sip3dtag'].to_numpy()).ravel()
182 
183  jet_x = np.concatenate(df[x].to_numpy()).ravel()
184 
185  # too slow
186  #if len(jet_x.flatten()) != len(jet_flavor.flatten()):
187  # print("Hi!")
188  # print(len(jet_x))
189  # new_jet_x = np.array([])
190  # for i in np.arange(len(jet_x)):
191  # if len(jet_flavor[i]) > 1:
192  # print(len(jet_flavor[i]), " ", np.ones(len(jet_flavor[i])), " ", np.ones(len(jet_flavor[i]))*jet_x[i])
193  # new_jet_x = np.append(new_jet_x, np.ones(len(jet_flavor[i]))*jet_x[i])
194  #print(new_jet_x[:15])
195 
196  if which == 'charm':
197  jet_x = jet_x[ jet_flavor == 4 ]
198  jet_tag = jet_tag[ jet_flavor == 4 ]
199  elif which == 'light':
200  jet_x = jet_x[ ((jet_flavor < 4) | (jet_flavor == 21)) ]
201  jet_tag = jet_tag[ ((jet_flavor < 4) | (jet_flavor == 21)) ]
202 
203  (tru_counts, bins) = np.histogram(jet_x, range=xrange,
204  bins=xbins)
205 
206  (tag_counts, bins) = np.histogram(jet_x[ jet_tag == 1 ], range=xrange,
207  bins=xbins)
208 
209 
210  eff = tag_counts/tru_counts
211  n_err = scipy.stats.binom.interval(1.0-math.exp(-1), tru_counts, eff)
212 
213  eff_err=[np.fabs(n_err[0]/tru_counts-eff), np.fabs(n_err[1]/tru_counts-eff)]
214 
215 
216  bin_widths = np.diff(bins)
217  bin_centers = bins[:-1] + bin_widths/2
218 
219  print(f"========= Validation Information from DifferentialTaggingEfficiency =========")
220  print(eff)
221  print(eff_err)
222  print(f"Tagging Efficiency above X threshold for {which}:")
223  for ibin in np.arange(len(bins)-1):
224  #mean_eff = np.nanmean(eff[ibin:])
225  #avg_errors = (eff_err[0]+eff_err[1])/2.0
226  #mean_err = np.sqrt(np.sum((avg_errors[ibin:]/eff[ibin:])**2))*mean_eff
227  n_tag = np.sum(tag_counts[ibin:])
228  n_tru = np.sum(tru_counts[ibin:])
229  eff_above_x = n_tag/n_tru
230  err_above_x = np.nanmean([np.fabs(np.sum(n_err[0][ibin:])/n_tru-eff_above_x), np.fabs(np.sum(n_err[1][ibin:])/n_tru-eff_above_x)])
231  #print(f"Minimum X: {bins[ibin]:.1f} Tag. Efficiency: ({mean_eff*100:.3f} +/- {mean_err*100:.3f})%")
232  print(f"Minimum X: {bins[ibin]:.1f} Tag. Efficiency: ({eff_above_x*100:.3f} +/- {err_above_x*100:.3f})%")
233 
234 
235  print(f"========= Validation Information from DifferentialTaggingEfficiency =========")
236 
237  return (bin_centers, bin_widths, eff, eff_err)
238 
239