EIC Software
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
EICAnalysisTools.py
Go to the documentation of this file. Or view the newest version in sPHENIX GitHub for file EICAnalysisTools.py
1 import sys
2 import pandas as pd
3 import numpy as np
4 import uproot
5 import scipy
6 import math
7 from uproot_methods import *
8 from concurrent.futures import ThreadPoolExecutor
9 
10 
11 # Define useful variables
12 
13 ## Units
14 
15 global u_fb,u_mb,u_pb
16 
17 u_fb = 1
18 u_mb = 1e12*u_fb
19 u_pb = 1e3*u_fb
20 
21 
22 
23 def UprootLoad(files=[], treename="", branches=[]):
24  print(f"::UprootLoad({files}")
25  executor = ThreadPoolExecutor(8)
26 
27  df_list = []
28  data = uproot.pandas.iterate(files,
29  treename,
30  branches=branches,
31  flatten=False,
32  executor=executor)
33  for dataframe in data:
34  df_list.append(dataframe)
35 
36  df = pd.concat(df_list)
37 
38  return df
39 
40 def TotalXSection(process='CC_DIS_e18_p275'):
41  global u_mb
42  if process == 'CC_DIS_e18_p275':
43  return 2.408e-08*u_mb
44  elif process == 'CC_DIS_e10_p100_CT18NNLO':
45  return 5.44044e-09*u_mb
46  elif process == 'CC_DIS_e10_p275_CT18NNLO':
47  return 1.47637e-08*u_mb
48  elif process == 'CC_DIS_e10_p275_CT1820Rs2':
49  return 1.47637e-08*u_mb
50  elif process == 'CC_DIS_e10_p100_CT1820Rs2':
51  return 5.44842e-09*u_mb
52  elif process == 'CC_DIS_e10_p275_CT1821Rs2':
53  return 1.45884e-08*u_mb
54  elif process == 'CC_DIS_e10_p100_CT1821Rs2':
55  return 5.36006e-09*u_mb
56  elif process == 'NC_DIS_e18_p275':
57  return 3.671e-06*u_mb
58 
59  print(f"EICAnalysisTools::TotalXSection == Unable to find requested process, {process}.")
60  # Don't continue. This is bad. Nonsense will result. The user should fix this.
61  sys.exit(-1)
62  return 0.0
63 
64 
65 # Compute auxiliary information for a DataFrame
66 def DISBjorkenX(row):
67  """
68  pProton = branchParticle.At(0).P4(); #these numbers 0 , 3, 5 are hardcoded in Pythia8
69  pleptonIn = branchParticle.At(3).P4();
70  pleptonOut = branchParticle.At(5).P4();
71  pPhoton = pleptonIn - pleptonOut;
72 
73  #Q2, W2, Bjorken x, y, nu.
74  Q2 = -pPhoton.M2()
75  W2 = (pProton + pPhoton).M2()
76  x = Q2 / (2. * pProton.Dot(pPhoton))
77  y = (pProton.Dot(pPhoton)) / (pProton.Dot(pleptonIn))
78  """
79 
80  particles_px = row['Particle.Px']
81  particles_py = row['Particle.Py']
82  particles_pz = row['Particle.Pz']
83  particles_E = row['Particle.E']
84 
85  # Beam Particle 4-vectors
86  iproton = 0
87  ilepton1 = 3
88  ilepton2 = 5
89 
90  pProton = TLorentzVector(particles_px[iproton], particles_py[iproton], particles_pz[iproton], particles_E[iproton])
91  pleptonIn = TLorentzVector(particles_px[ilepton1], particles_py[ilepton1], particles_pz[ilepton1], particles_E[ilepton1])
92  pleptonOut = TLorentzVector(particles_px[ilepton2], particles_py[ilepton2], particles_pz[ilepton2], particles_E[ilepton2])
93  pPhoton = pleptonIn - pleptonOut
94 
95  Q2 = -pPhoton.mag2
96  W2 = (pProton + pPhoton).mag2
97  x = Q2 / (2. * pProton.dot(pPhoton))
98 
99  # Write this value of x for each jet, since we need to do jet-level plots
100  x_array = np.ones(len(row['Jet.PT'])) * x
101 
102  return x_array
103 
104 
105 
106 
107 ###################################################################
108 # Differential Computation Functions
109 ###################################################################
110 
111 # Flavour tagging study code
112 def DifferentialTaggingYield(df, x='Jet.PT', xrange=[10,50], xbins=[10,12.5,15,20,25,30,40,50], which='all', process='CC_DIS_e18_p275', target_lumi = 100):
113  global u_fb
114  n_gen = len(df)
115  print(f"n_gen = {n_gen}")
116  jet_pt = np.concatenate(df['Jet.PT'].to_numpy()).ravel()
117  jet_eta = np.concatenate(df['Jet.Eta'].to_numpy()).ravel()
118  jet_flavor = np.concatenate(df['Jet.Flavor'].to_numpy()).ravel()
119  jet_tag = np.concatenate(df['Jet.BTag'].to_numpy()).ravel()
120 
121  jet_x = np.concatenate(df[x].to_numpy()).ravel()
122 
123 
124  #jet_basics = (0.01 < jet_eta) & (jet_eta < 0.9)
125  jet_basics = (jet_tag == 1)
126 
127  all_flavor = ( jet_flavor > -999.0 ) & (jet_basics)
128  light_flavor = (( jet_flavor < 4 ) | ( jet_flavor == 21 )) & (jet_basics)
129  charm_flavor = ( jet_flavor == 4 ) & (jet_basics)
130 
131  selection = all_flavor
132  if which == 'charm':
133  selection = charm_flavor
134  elif which == 'light':
135  selection = light_flavor
136 
137  (counts, bins) = np.histogram(jet_x[ selection ], range=xrange,
138  bins=xbins)
139  #bins=40)
140 
141  bin_widths = np.diff(bins)
142  bin_centers = bins[:-1] + bin_widths/2
143 
144  errors = np.sqrt(counts)
145  rel_errors = errors/counts
146 
147 
148  # convert counts to dsigma/dpT * 100/fb
149  dsigma_dx = counts * TotalXSection(process) * target_lumi*u_fb**(-1) / n_gen #/ bin_widths
150  dsigma_dx_errors = rel_errors * dsigma_dx
151 
152 
153  print(f"========= Validation Information from DifferentialTaggingYield ========= \n \
154  \n \
155  Process considered: {process} \n \
156  Theory Cross-Section: {TotalXSection(process)} \n \
157  Target Integrated Luminosity: {target_lumi} \n \
158  Total Predicted Yield of Events: {np.sum(dsigma_dx)} \ \
159  \n \
160  ========= Validation Information from DifferentialTaggingYield =========")
161 
162 
163  return (bin_centers, bin_widths, dsigma_dx, dsigma_dx_errors)
164 
165 
166 
167 def DifferentialTaggingEfficiency(df, x='Jet.PT', xrange=[10,50], xbins=[10,12.5,15,20,25,30,40,50], which='all'):
168  jet_pt = np.concatenate(df['Jet.PT'].to_numpy()).ravel()
169  jet_eta = np.concatenate(df['Jet.Eta'].to_numpy()).ravel()
170  jet_flavor = np.concatenate(df['Jet.Flavor'].to_numpy()).ravel()
171  jet_tag = np.concatenate(df['Jet.BTag'].to_numpy()).ravel()
172 
173  jet_x = np.concatenate(df[x].to_numpy()).ravel()
174 
175 
176  jet_tagged = (jet_tag == 1)
177 
178  all_flavor = ( jet_flavor > -999.0 )
179  light_flavor = ( jet_flavor < 4 ) | ( jet_flavor == 21 )
180  charm_flavor = ( jet_flavor == 4 )
181 
182  selection = all_flavor
183  if which == 'charm':
184  selection = charm_flavor
185  elif which == 'light':
186  selection = light_flavor
187 
188  (tru_counts, bins) = np.histogram(jet_x[ selection ], range=xrange,
189  bins=xbins)
190 
191  (tag_counts, bins) = np.histogram(jet_x[ (selection) & (jet_tagged) ], range=xrange,
192  bins=xbins)
193 
194 
195  eff = tag_counts/tru_counts
196  n_err = scipy.stats.binom.interval(1.0-math.exp(-1), tru_counts, eff)
197 
198  eff_err=[np.fabs(n_err[0]/tru_counts-eff), np.fabs(n_err[1]/tru_counts-eff)]
199 
200 
201  bin_widths = np.diff(bins)
202  bin_centers = bins[:-1] + bin_widths/2
203 
204  print(f"========= Validation Information from DifferentialTaggingEfficiency =========")
205  print(eff)
206  print(eff_err)
207  print(f"Tagging Efficiency above X threshold for {which}:")
208  for ibin in np.arange(len(bins)-1):
209  #mean_eff = np.nanmean(eff[ibin:])
210  #avg_errors = (eff_err[0]+eff_err[1])/2.0
211  #mean_err = np.sqrt(np.sum((avg_errors[ibin:]/eff[ibin:])**2))*mean_eff
212  n_tag = np.sum(tag_counts[ibin:])
213  n_tru = np.sum(tru_counts[ibin:])
214  eff_above_x = n_tag/n_tru
215  err_above_x = np.nanmean([np.fabs(np.sum(n_err[0][ibin:])/n_tru-eff_above_x), np.fabs(np.sum(n_err[1][ibin:])/n_tru-eff_above_x)])
216  #print(f"Minimum X: {bins[ibin]:.1f} Tag. Efficiency: ({mean_eff*100:.3f} +/- {mean_err*100:.3f})%")
217  print(f"Minimum X: {bins[ibin]:.1f} Tag. Efficiency: ({eff_above_x*100:.3f} +/- {err_above_x*100:.3f})%")
218 
219 
220  print(f"========= Validation Information from DifferentialTaggingEfficiency =========")
221 
222  return (bin_centers, bin_widths, eff, eff_err)
223 
224