Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add sum of weights for BTA_ttbar workflow, fix typo in suball script and make basepath in BTA producers configurable #111

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
4 changes: 2 additions & 2 deletions scripts/suball.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ def get_lumi_from_web(year):
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Mastering workflow submission")
parser = config_parser(parser)
paser = scaleout_parser(parser)
paser = debug_parser(parser)
parser = scaleout_parser(parser)
parser = debug_parser(parser)
parser.add_argument(
"-sc",
"--scheme",
Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion src/BTVNanoCommissioning/helpers/func.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,5 +257,6 @@ def uproot_writeable(events, include=["events", "run", "luminosityBlock"]):
b_nest[n] = ak.fill_none(
ak.packed(ak.without_parameters(events[bname][n])), -99
)
ev[bname] = ak.zip(b_nest)
if bool(b_nest):
ev[bname] = ak.zip(b_nest)
return ev
4 changes: 2 additions & 2 deletions src/BTVNanoCommissioning/utils/AK4_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
"Run2022D": "calibeHistoWrite_Data2022D_NANO130X_v1.root",
"MC": "calibeHistoWrite_MC2022_NANO130X_v2.root",
},
"jetveto": {"Run2022CD jetvetomap": "Winter22Run3_RunCD_v1.histo.root"},
"jetveto": {"Run2022CD jetvetomap": "Summer22_23Sep2023_RunCD_v1.root"},
},
"Summer22EE": {
"lumiMask": "Cert_Collisions2022_355100_362760_Golden.json",
Expand All @@ -80,7 +80,7 @@
"ele_Reco_med 2022Re-recoE+PromptFG Electron-ID-SF": "Reco20to75",
"ele_Reco_high 2022Re-recoE+PromptFG Electron-ID-SF": "RecoAbove75",
},
"jetveto": {"Run2022E jetvetomap_eep": "Winter22Run3_RunE_v1.histo.root"},
"jetveto": {"Run2022E jetvetomap_eep": "Summer22EE_23Sep2023_RunEFG_v1.root"},
# use for BTA production, jet probablity
"JPCalib": {
"Run2022E": "calibeHistoWrite_Data2022F_NANO130X_v1.root",
Expand Down
6 changes: 5 additions & 1 deletion src/BTVNanoCommissioning/utils/correction.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,11 @@ def JME_shifts(
if k in events.metadata["dataset"]
]
if len(jecname) > 1:
raise ("Multiple uncertainties match to this era")
raise ValueError("Multiple uncertainties match to this era")
elif len(jecname) == 0:
raise ValueError(
"Available JEC variations in this era are not compatible with this file. Did you choose the correct dataset-era combination?"
)
else:
jecname = jecname[0] + "_DATA"
else:
Expand Down
6 changes: 4 additions & 2 deletions src/BTVNanoCommissioning/workflows/BTA_producer.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def __init__(
chunksize=75000,
addPFMuons=False, # BTA custom argument
addAllTracks=False, # BTA custom argument
base_file_path="root://eoscms.cern.ch//eos/cms/store/group/phys_btag/milee",
):
self._year = year
self._campaign = campaign
Expand All @@ -46,6 +47,7 @@ def __init__(
self.addPFMuons = addPFMuons
self.addAllTracks = addAllTracks
self.isSyst = isSyst
self.base_file_path = base_file_path

@property
def accumulator(self):
Expand All @@ -65,7 +67,7 @@ def process(self, events):
if self.isSyst:
fname = "systematic/" + fname
checkf = os.popen(
f"gfal-ls root://eoscms.cern.ch//eos/cms/store/group/phys_btag/milee/{dirname}/{self._campaign.replace('Run3','')}/{fname}"
f"gfal-ls {self.base_file_path}/{dirname}/{self._campaign.replace('Run3','')}/{fname}"
).read()
if len(checkf) > 0:
print("skip ", checkf)
Expand Down Expand Up @@ -1156,7 +1158,7 @@ def process_shift(self, events, shift_name):
output_root[bname] = ak.zip(b_nest)
fout["btagana/ttree"] = output_root
os.system(
f"xrdcp -p --silent {fname} root://eoscms.cern.ch//eos/cms/store/group/phys_btag/milee/{dirname}/{self._campaign.replace('Run3','')}/{fname}"
f"xrdcp -p --silent {fname} {self.base_file_path}/{dirname}/{self._campaign.replace('Run3','')}/{fname}"
)
os.system(f"rm {fname}")

Expand Down
118 changes: 91 additions & 27 deletions src/BTVNanoCommissioning/workflows/BTA_ttbar_producer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ def __init__(
isArray=True,
noHist=False,
chunksize=75000,
base_file_path="root://eoscms.cern.ch//eos/cms/store/group/phys_btag/milee",
output_dir="/tmp/{user}/phys_btag/btv_nano_commissioning",
):
self._year = year
self._campaign = campaign
Expand All @@ -37,6 +39,8 @@ def __init__(
# note: in BTA TTbarSelectionProducer it says "disable trigger selection in MC"
# for consistency, we will disable both trigger selection in MC and data here
self.do_trig_sel = False
self.base_file_path = base_file_path
self.output_dir = output_dir.format(user=os.environ["USER"]) if "{user}" in output_dir else output_dir

@property
def accumulator(self):
Expand Down Expand Up @@ -65,10 +69,10 @@ def process_shift(self, events, shift_name):
dataset = events.metadata["dataset"]

fname = f"{dataset}_{shift_name}/{events.metadata['filename'].split('/')[-1].replace('.root','')}_{int(events.metadata['entrystop']/self.chunksize)}.root"
outfile_path = f"{self.base_file_path}/BTA_ttbar/{self._campaign.replace('Run3','')}/{fname}"

checkf = os.popen(
f"gfal-ls root://eoscms.cern.ch//eos/cms/store/group/phys_btag/milee/BTA_ttbar/{self._campaign.replace('Run3','')}/{fname}"
).read()
gfal_ls_command = f"gfal-ls {outfile_path}"
checkf = os.popen(gfal_ls_command).read()
if len(checkf) > 0:
return {dataset: len(events)}

Expand Down Expand Up @@ -380,8 +384,13 @@ def process_shift(self, events, shift_name):
# store weights according to BTA code
# note: in original BTA code, all weights are appended in a same variable ttbar_w. Here we separate them according to the logic in NanoAOD
basic_vars["ttbar_w"] = events.genWeight
# LHE pdf variation weights (w_var / w_nominal) for LHA IDs 325300 - 325402
lhe_pdf_w_arrays = getattr(events, "LHEPdfWeight", None)
# [0] is renscfact=0.5d0 facscfact=0.5d0 ; [1] is renscfact=0.5d0 facscfact=1d0 ; [2] is renscfact=0.5d0 facscfact=2d0 ; [3] is renscfact=1d0 facscfact=0.5d0 ;
# [4] is renscfact=1d0 facscfact=1d0 ; [5] is renscfact=1d0 facscfact=2d0 ; [6] is renscfact=2d0 facscfact=0.5d0 ; [7] is renscfact=2d0 facscfact=1d0 ;
# [8] is renscfact=2d0 facscfact=2d0
lhe_scale_w_arrays = getattr(events, "LHEScaleWeight", None)
# [0] is ISR=2 FSR=1; [1] is ISR=1 FSR=2[2] is ISR=0.5 FSR=1; [3] is ISR=1 FSR=0.5;
ps_w_arrays = getattr(events, "PSWeight", None)

###############
Expand Down Expand Up @@ -515,7 +524,10 @@ def process_shift(self, events, shift_name):
passJetSel = (
((abs(chsel) == 11) | (abs(chsel) == 13)) & (ak.num(Jet_clean) >= 4)
) | ((abs(chsel) > 13) & (ak.num(Jet_clean) >= 1))
passMetSel = events.PuppiMET.pt > 0
if "Run3" in self._campaign:
passMetSel = events.PuppiMET.pt > 0
else:
passMetSel = events.MET.pt > 0
Comment on lines +527 to +530
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apologies, did not find out the implementation was using the JME_shifts
Indeed in the functions we already implement the JEC with puppiMET and replace it as MET branch.

If you use the puppiMET branch, this might not propagate the JEC correction correctly


# and the channel selection, configured in TTbarSelectionFilter
# https://github.com/cms-btv-pog/RecoBTag-PerformanceMeasurements/blob/10_6_X/python/TTbarSelectionFilter_cfi.py#L4
Expand All @@ -527,25 +539,55 @@ def process_shift(self, events, shift_name):
###############
# Write root #
###############
fname = f"{dataset}_{shift_name}/{events.metadata['filename'].split('/')[-1].replace('.root','')}_{int(events.metadata['entrystop']/self.chunksize)}.root"
os.system(f"mkdir -p {dataset}_{shift_name}")
local_outdir = f"{self.output_dir}/BTA_ttbar/{self._campaign.replace('Run3','')}"
# fname is composed of {dataset}_{shift_name}/ and filename
os.system(f"mkdir -p {local_outdir}/{dataset}_{shift_name}")
local_outfile_path = f"{local_outdir}/{fname}"
# no events pass selection, just write bookkeeping metadata to file
if ak.any(passEvent) == False:
with uproot.recreate(fname) as fout:
fout["sumw"] = {
"total_events": ak.Array([len(events)]),
}
if not isRealData:
fout["total_pos_events"] = ak.Array([ak.sum(events.genWeight > 0)])
fout["total_neg_events"] = ak.Array(
[-1.0 * ak.sum(events.genWeight < 0)]
)
fout["total_wei_events"] = ak.Array([ak.sum(events.genWeight)])
fout["total_poswei_events"] = ak.Array(
[ak.sum(events.genWeight[events.genWeight > 0.0])]
)
fout["total_negwei_events"] = ak.Array(
[ak.sum(events.genWeight[events.genWeight < 0.0])]
)
with uproot.recreate(local_outfile_path) as fout:
if isRealData:
fout["sumw"] = {"total_events": ak.Array([len(events)])}
else:
# add counters for systematic variations of lhe and parton shower weights
# (this is not pretty but uproot wants fixed structure of branches)
try:
number_lhe_scaleweights = len(lhe_scale_w_arrays) / len(lhe_scale_w_arrays[:,0])
except TypeError:
number_lhe_scaleweights = 0
try:
number_of_psweights = len(ps_w_arrays) / len(ps_w_arrays[:,0])
except TypeError:
number_of_psweights = 0
fout["sumw"] = {
"total_events": ak.Array([len(events)]),
"total_pos_events": ak.Array([ak.sum(events.genWeight > 0)]),
"total_neg_events": ak.Array([-1.0 * ak.sum(events.genWeight < 0)]),
"total_wei_events": ak.Array([ak.sum(events.genWeight)]),
"total_poswei_events": ak.Array(
[ak.sum(events.genWeight[events.genWeight > 0.0])]
),
"total_negwei_events": ak.Array(
[ak.sum(events.genWeight[events.genWeight < 0.0])]
),
"total_lhe_scaleweights_0": ak.Array([ak.sum(lhe_scale_w_arrays[:, 0] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 0 else ak.Array([0.]),
"total_lhe_scaleweights_1": ak.Array([ak.sum(lhe_scale_w_arrays[:, 1] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 1 else ak.Array([0.]),
"total_lhe_scaleweights_2": ak.Array([ak.sum(lhe_scale_w_arrays[:, 2] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 2 else ak.Array([0.]),
"total_lhe_scaleweights_3": ak.Array([ak.sum(lhe_scale_w_arrays[:, 3] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 3 else ak.Array([0.]),
"total_lhe_scaleweights_4": ak.Array([ak.sum(lhe_scale_w_arrays[:, 4] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 4 else ak.Array([0.]),
"total_lhe_scaleweights_5": ak.Array([ak.sum(lhe_scale_w_arrays[:, 5] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 5 else ak.Array([0.]),
"total_lhe_scaleweights_6": ak.Array([ak.sum(lhe_scale_w_arrays[:, 6] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 6 else ak.Array([0.]),
"total_lhe_scaleweights_7": ak.Array([ak.sum(lhe_scale_w_arrays[:, 7] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 7 else ak.Array([0.]),
"total_lhe_scaleweights_8": ak.Array([ak.sum(lhe_scale_w_arrays[:, 8] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 8 else ak.Array([0.]),
"total_psweights_0": ak.Array([ak.sum(ps_w_arrays[:, 0] * events.genWeight)]) if ps_w_arrays is not None and number_of_psweights > 0 else ak.Array([0.]),
"total_psweights_1": ak.Array([ak.sum(ps_w_arrays[:, 1] * events.genWeight)]) if ps_w_arrays is not None and number_of_psweights > 1 else ak.Array([0.]),
"total_psweights_2": ak.Array([ak.sum(ps_w_arrays[:, 2] * events.genWeight)]) if ps_w_arrays is not None and number_of_psweights > 2 else ak.Array([0.]),
"total_psweights_3": ak.Array([ak.sum(ps_w_arrays[:, 3] * events.genWeight)]) if ps_w_arrays is not None and number_of_psweights > 3 else ak.Array([0.]),
}
# transfer file for bookkeeping
transfer_command = f"xrdcp -p --silent {local_outfile_path} {outfile_path}"
os.system(transfer_command)
os.system(f"rm {local_outfile_path}")
return {dataset: 0}

output = {
Expand All @@ -563,7 +605,7 @@ def process_shift(self, events, shift_name):
output["ttbar_ps_w"] = ps_w_arrays[passEvent]

# customize output file name: <dataset>_<nanoAOD file name>_<chunk index>.root
with uproot.recreate(fname) as fout:
with uproot.recreate(local_outfile_path) as fout:
output_root = {}
for bname in output.keys():
if not output[bname].fields:
Expand All @@ -577,6 +619,16 @@ def process_shift(self, events, shift_name):
if isRealData:
fout["sumw"] = {"total_events": ak.Array([len(events)])}
else:
# add counters for systematic variations of lhe and parton shower weights
# (this is not pretty but uproot wants fixed structure of branches)
try:
number_lhe_scaleweights = len(lhe_scale_w_arrays) / len(lhe_scale_w_arrays[:,0])
except TypeError:
number_lhe_scaleweights = 0
try:
number_of_psweights = len(ps_w_arrays) / len(ps_w_arrays[:,0])
except TypeError:
number_of_psweights = 0
fout["sumw"] = {
"total_events": ak.Array([len(events)]),
"total_pos_events": ak.Array([ak.sum(events.genWeight > 0)]),
Expand All @@ -588,11 +640,23 @@ def process_shift(self, events, shift_name):
"total_negwei_events": ak.Array(
[ak.sum(events.genWeight[events.genWeight < 0.0])]
),
"total_lhe_scaleweights_0": ak.Array([ak.sum(lhe_scale_w_arrays[:, 0] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 0 else ak.Array([0.]),
"total_lhe_scaleweights_1": ak.Array([ak.sum(lhe_scale_w_arrays[:, 1] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 1 else ak.Array([0.]),
"total_lhe_scaleweights_2": ak.Array([ak.sum(lhe_scale_w_arrays[:, 2] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 2 else ak.Array([0.]),
"total_lhe_scaleweights_3": ak.Array([ak.sum(lhe_scale_w_arrays[:, 3] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 3 else ak.Array([0.]),
"total_lhe_scaleweights_4": ak.Array([ak.sum(lhe_scale_w_arrays[:, 4] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 4 else ak.Array([0.]),
"total_lhe_scaleweights_5": ak.Array([ak.sum(lhe_scale_w_arrays[:, 5] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 5 else ak.Array([0.]),
"total_lhe_scaleweights_6": ak.Array([ak.sum(lhe_scale_w_arrays[:, 6] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 6 else ak.Array([0.]),
"total_lhe_scaleweights_7": ak.Array([ak.sum(lhe_scale_w_arrays[:, 7] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 7 else ak.Array([0.]),
"total_lhe_scaleweights_8": ak.Array([ak.sum(lhe_scale_w_arrays[:, 8] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 8 else ak.Array([0.]),
"total_psweights_0": ak.Array([ak.sum(ps_w_arrays[:, 0] * events.genWeight)]) if ps_w_arrays is not None and number_of_psweights > 0 else ak.Array([0.]),
"total_psweights_1": ak.Array([ak.sum(ps_w_arrays[:, 1] * events.genWeight)]) if ps_w_arrays is not None and number_of_psweights > 1 else ak.Array([0.]),
"total_psweights_2": ak.Array([ak.sum(ps_w_arrays[:, 2] * events.genWeight)]) if ps_w_arrays is not None and number_of_psweights > 2 else ak.Array([0.]),
"total_psweights_3": ak.Array([ak.sum(ps_w_arrays[:, 3] * events.genWeight)]) if ps_w_arrays is not None and number_of_psweights > 3 else ak.Array([0.]),
}
os.system(
f"xrdcp -p --silent {fname} root://eoscms.cern.ch//eos/cms/store/group/phys_btag/milee/BTA_ttbar/{self._campaign.replace('Run3','')}/{fname}"
)
os.system(f"rm {fname}")
transfer_command = f"xrdcp -p --silent {local_outfile_path} {outfile_path}"
os.system(transfer_command)
os.system(f"rm {local_outfile_path}")

return {dataset: len(events)}

Expand Down
3 changes: 1 addition & 2 deletions test_env.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: btv_coffea
channels:
- conda-forge
- defaults
- nodefaults
dependencies:
- python[version='<=3.10']
- voms
Expand All @@ -19,4 +19,3 @@ dependencies:
- arrow
- dask-jobqueue
- xgboost

Loading