Last active
July 11, 2023 09:09
-
-
Save riga/0cf3cbeade2e577c24ae92b14da15d22 to your computer and use it in GitHub Desktop.
Evaluation script for KLUB samples
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
from __future__ import annotations | |
import os | |
from fnmatch import fnmatch | |
from multiprocessing import Pool as ProcessPool | |
from typing import Any | |
from tqdm import tqdm | |
import numpy as np | |
import awkward as ak | |
import uproot | |
# | |
# configurations | |
# | |
masses = [ | |
250, 260, 270, 280, 300, 320, 350, 400, 450, 500, 550, 600, 650, | |
700, 750, 800, 850, 900, 1000, 1250, 1500, 1750, 2000, 2500, 3000, | |
] | |
spins = [0, 2] | |
baseline_selection = ( | |
"isLeptrigger & " | |
"((pairType == 0) | (pairType == 1) | (pairType == 2)) & " | |
"(nleps == 0) & " | |
"(nbjetscand > 1)" | |
) | |
klub_index_columns = [ | |
"EventNumber", | |
"RunNumber", | |
"lumi", | |
] | |
klub_input_columns = [ | |
# TODO: add the full list of required klub input branches here | |
] | |
# | |
# NN evaluation | |
# | |
def evaluate_events(events: ak.Array) -> ak.Array: | |
# TODO: add actual evaluation and return an ak array with results | |
return ak.zip({"dnn_output": np.ones(len(events))}) | |
# | |
# high-level evaluation functions | |
# | |
def evaluate_samples( | |
skim_directory: str, | |
output_directory: str, | |
n_parallel: int = 1, | |
) -> None: | |
# get a list of all sample names in the klub directory | |
sample_names = [] | |
for sample_name in os.listdir(skim_directory): | |
sample_dir = os.path.join(skim_directory, sample_name) | |
if os.path.isdir(sample_dir) and os.path.exists(os.path.join(sample_dir, "output_0.root")): | |
sample_names.append(sample_name) | |
# start the evaluation | |
print(f"evaluating {len(sample_names)} samples") | |
for sample_name in sample_names: | |
evaluate_sample(skim_directory, output_directory, sample_name, n_parallel=n_parallel) | |
def evaluate_sample( | |
skim_directory: str, | |
output_directory: str, | |
sample_name: str, | |
n_parallel: int = 1, | |
) -> None: | |
print(f"evaluate {sample_name} ...") | |
# ensure that the output directory exists | |
output_sample_dir = os.path.join(output_directory, sample_name) | |
output_sample_dir = os.path.expandvars(os.path.expanduser(output_sample_dir)) | |
if not os.path.exists(output_sample_dir): | |
os.makedirs(output_sample_dir) | |
# determine all file names to load | |
input_sample_dir = os.path.join(skim_directory, sample_name) | |
evaluation_args = [ | |
(os.path.join(input_sample_dir, file_name), os.path.join(output_sample_dir, file_name)) | |
for file_name in os.listdir(input_sample_dir) | |
if fnmatch(file_name, "output_*.root") | |
] | |
# potentially run in parallel | |
if n_parallel > 1: | |
with ProcessPool(n_parallel) as pool: | |
list(tqdm( | |
pool.imap(_evaluate_file_mp, evaluation_args), | |
total=len(evaluation_args), | |
)) | |
else: | |
list(tqdm( | |
map(_evaluate_file_mp, evaluation_args), | |
total=len(evaluation_args), | |
)) | |
print("done") | |
def evaluate_file(input_file_path: str, output_file_path: str) -> None: | |
# prepare expressions | |
expressions = klub_index_columns + klub_input_columns | |
# load the klub array | |
f = uproot.open(input_file_path) | |
input_array = f["HTauTauTree"].arrays(expressions=expressions, cut=baseline_selection) | |
# run the evaluation | |
output_array = evaluate_events(input_array) | |
# add index columns | |
for c in klub_index_columns: | |
output_array = ak.with_field(output_array, input_array[c], c) | |
# save the output as root | |
output_file = uproot.recreate(output_file_path) | |
output_file["evaluation"] = dict(zip(output_array.fields, ak.unzip(output_array))) | |
def _evaluate_file_mp(args: Any) -> None: | |
return evaluate_file(*args) | |
# entry hook | |
if __name__ == "__main__": | |
# evaluate_samples( | |
# skim_directory="/eos/user/t/tokramer/hhbbtautau/skims/2017", | |
# output_directory="/eos/user/m/mrieger/hhres_dnn_datacards/nn/2017", | |
# n_parallel=1, | |
# ) | |
evaluate_sample( | |
skim_directory="/eos/user/t/tokramer/hhbbtautau/skims/2017", | |
output_directory="/eos/user/m/mrieger/hhres_dnn_datacards/nn/2017", | |
sample_name="SKIM_ggF_Radion_m900", | |
n_parallel=1, | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment