EIC Software
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
charm_jet_tagging_optimization_study.py
Go to the documentation of this file. Or view the newest version in sPHENIX GitHub for file charm_jet_tagging_optimization_study.py
1 import matplotlib as mpl
2 import uproot
3 import matplotlib.pyplot as plt
4 import scipy
5 import numpy as np
6 import math
7 import pandas as pd
8 import seaborn as sns
9 import mplhep as hep
10 #import zfit
11 import inspect
12 import sys
13 import argparse
14 import glob
15 
16 from concurrent.futures import ThreadPoolExecutor
17 
18 plt.style.use(hep.style.ATLAS)
19 
20 plt.rcParams.update({'font.sans-serif': "Arial",
21  'font.family': "sans-serif",
22  'font.size': 30,
23  'mathtext.fontset': 'custom',
24  'mathtext.rm': 'Arial',
25  })
26 
27 
28 import EICAnalysisTools as eat
29 
30 
31 # Parse arguments
32 parser = argparse.ArgumentParser()
33 
34 parser.add_argument("-i", "--input", type=str,
35  help="Main input subfolder")
36 
37 args = parser.parse_args()
38 
39 
40 # Grab all the CSV files
41 csvfiles = glob.glob(f"{args.input}/*/tagging_study.csv")
42 
43 print(csvfiles)
44 
45 
46 dataframes = []
47 
48 for csvfile in csvfiles:
49  dataframes.append(pd.read_csv(csvfile))
50 
51 
52 data = pd.concat(dataframes)
53 
54 data = data.groupby(["Variation"], as_index=False).sum()
55 
56 print(data.head(10))
57 
58 # Add the Punzi DOM
59 
60 DiscoverySignificance = 5.0
61 xsection = eat.TotalXSection('CC_DIS_e10_p275_CT18NNLO')
62 lumi = 100 #/fb
63 n_gen = len(dataframes)*2e5 # 200,000 collisions generated per file
64 
65 allLight = float(data[data["Variation"] == "all" ]["Light"])
66 allCharm = float(data[data["Variation"] == "all" ]["Charm"])
67 
68 
69 data["LightEff"] = data["Light"]/allLight
70 data["Light_100fb"] = xsection*lumi*data["Light"]/n_gen
71 data["CharmEff"] = data["Charm"]/allCharm
72 data["Charm_100fb"] = xsection*lumi*data["Charm"]/n_gen
73 
74 
75 
76 
77 data["PunziFOM"] = (data["Charm"]/allCharm)/(DiscoverySignificance/2.0 + np.sqrt(data["Light_100fb"]))
78 data["CharmErrFOM"] = data["Charm_100fb"]/np.sqrt(data["Charm_100fb"] + 2.0*data["Light_100fb"])
79 
80 pd.set_option('display.max_rows', data.shape[0]+1)
81 
82 print(data)
83 
84 
85 # Find the row with the max(PunziFOM)
86 
87 #FOM_choice = "PunziFOM"
88 FOM_choice = "CharmErrFOM"
89 
90 # Remove that "all" row before optimization printing
91 data = data[data["Variation"] != "all"]
92 
93 optimal_row = data[FOM_choice].idxmax()
94 
95 print(data.iloc[optimal_row])
96 print(f"Number of generated events: {n_gen}")
97 
98 # Hold light jet efficiency ~constant at some value and find the best operating point there.
99 
100 #print(data[ np.abs(data["LightEff"] - 2.0e-5)/1.0e-5 < 0.5 ])
101 
102 #lighteff_target = 4.00e-5
103 target = float(data.iloc[optimal_row][FOM_choice])
104 tolerance = 2
105 
106 print(f"Optimization of charm efficiency holding {FOM_choice} at ~{target}")
107 print(data[ np.abs(data[FOM_choice] - target) < tolerance ].head(20))
108 optimal_row = data[ np.abs(data[FOM_choice] - target) < tolerance ][FOM_choice].idxmax()
109 
110 print(data.iloc[optimal_row])
111