diff --git a/analyzers/dataframe/FCCAnalyses/JetClusteringUtils.h b/analyzers/dataframe/FCCAnalyses/JetClusteringUtils.h index 4259a8c5ef..195e4971c9 100644 --- a/analyzers/dataframe/FCCAnalyses/JetClusteringUtils.h +++ b/analyzers/dataframe/FCCAnalyses/JetClusteringUtils.h @@ -23,12 +23,26 @@ namespace JetClusteringUtils{ const int Nmax_dmerge = 10; // maximum number of d_{n, n+1} that are kept in FCCAnalysesJet + struct flav_details{ + ROOT::VecOps::RVec parton_flavour; + ROOT::VecOps::RVec hadron_flavour; + ROOT::VecOps::RVec ghost_pseudojets; + std::vector> ghost_jetconstituents; + ROOT::VecOps::RVec ghostStatus; + ROOT::VecOps::RVec MCindex; + }; + /** Structure to keep useful informations for the jets*/ struct FCCAnalysesJet{ + TString clustering_algo; + ROOT::VecOps::RVec clustering_params; ROOT::VecOps::RVec jets; std::vector> constituents; std::vector exclusive_dmerge; // vector of Nmax_dmerge values associated with merging from n + 1 to n jets, for n =1, 2, ... 10 std::vector exclusive_dmerge_max ; + ROOT::VecOps::RVec flavour; + //flav_details *flavour_details; + flav_details flavour_details; }; /** Set fastjet pseudoJet for later reconstruction*/ @@ -94,11 +108,13 @@ namespace JetClusteringUtils{ ///Internal methods - FCCAnalysesJet initialise_FCCAnalysesJet(); + FCCAnalysesJet initialise_FCCAnalysesJet(TString clustering_algo="default_string", ROOT::VecOps::RVec clustering_params={}); FCCAnalysesJet build_FCCAnalysesJet(const std::vector &in, const std::vector &dmerge, - const std::vector &dmerge_max); + const std::vector &dmerge_max, + TString clustering_algo="default_string", + ROOT::VecOps::RVec clustering_params={}); std::vector build_jets(fastjet::ClusterSequence & cs, int exclusive, float cut, diff --git a/analyzers/dataframe/FCCAnalyses/JetTaggingUtils.h b/analyzers/dataframe/FCCAnalyses/JetTaggingUtils.h index ba457cd43f..3943976bb8 100644 --- a/analyzers/dataframe/FCCAnalyses/JetTaggingUtils.h +++ b/analyzers/dataframe/FCCAnalyses/JetTaggingUtils.h @@ -7,6 +7,10 @@ #include "edm4hep/MCParticleData.h" #include "fastjet/JetDefinition.hh" #include "TRandom3.h" +#include "MCParticle.h" +#include "JetClusteringUtils.h" +#include "JetClustering.h" + /** Jet tagging utilities interface. This represents a set functions and utilities to perfom jet tagging from a list of jets. @@ -21,6 +25,24 @@ namespace JetTaggingUtils{ //Get flavour association of jet ROOT::VecOps::RVec get_flavour(ROOT::VecOps::RVec in, ROOT::VecOps::RVec MCin); + + ROOT::VecOps::RVec find_ghosts(const ROOT::VecOps::RVec & Particle, + const ROOT::VecOps::RVec & ind); + + JetClusteringUtils::FCCAnalysesJet set_flavour(const ROOT::VecOps::RVec & Particle, + const ROOT::VecOps::RVec & MCindices, + JetClusteringUtils::FCCAnalysesJet & jets, + std::vector & pseudoJets); + + ROOT::VecOps::RVec get_flavour(const JetClusteringUtils::FCCAnalysesJet & jets); + + ROOT::VecOps::RVec get_flavour(const ROOT::VecOps::RVec & Particle, + const ROOT::VecOps::RVec & ind, + JetClusteringUtils::FCCAnalysesJet & jets, + std::vector & pseudoJets); + + JetClusteringUtils::flav_details get_flavour_details(const JetClusteringUtils::FCCAnalysesJet & jets); + //Get b-tags with an efficiency applied ROOT::VecOps::RVec get_btag(ROOT::VecOps::RVec in, float efficiency, float mistag_c=0., float mistag_l=0., float mistag_g=0.); //Get c-tags with an efficiency applied diff --git a/analyzers/dataframe/src/JetClustering.cc b/analyzers/dataframe/src/JetClustering.cc index 8aeb0e865b..480587e949 100644 --- a/analyzers/dataframe/src/JetClustering.cc +++ b/analyzers/dataframe/src/JetClustering.cc @@ -161,15 +161,17 @@ JetClusteringUtils::FCCAnalysesJet clustering_ee_kt::operator() (const std::vect if (JetClusteringUtils::check(input.size(),_exclusive, _cut)==false) return JetClusteringUtils::initialise_FCCAnalysesJet(); _cs = fastjet::ClusterSequence(input, _def); - + //cluster jets std::vector pjets = JetClusteringUtils::build_jets(_cs, _exclusive, _cut, _sorted); //get dmerged elements std::vector dmerge = JetClusteringUtils::exclusive_dmerge(_cs, 0); std::vector dmerge_max = JetClusteringUtils::exclusive_dmerge(_cs, 1); + ROOT::VecOps::RVec clustering_params{float(_exclusive), _cut, float(_sorted), float(_recombination)}; + //transform to FCCAnalysesJet - JetClusteringUtils::FCCAnalysesJet result = JetClusteringUtils::build_FCCAnalysesJet(pjets, dmerge, dmerge_max ); + JetClusteringUtils::FCCAnalysesJet result = JetClusteringUtils::build_FCCAnalysesJet(pjets, dmerge, dmerge_max, "ee-kt", clustering_params); return result; } diff --git a/analyzers/dataframe/src/JetClusteringUtils.cc b/analyzers/dataframe/src/JetClusteringUtils.cc index d2139f9ead..6f2fa484ea 100644 --- a/analyzers/dataframe/src/JetClusteringUtils.cc +++ b/analyzers/dataframe/src/JetClusteringUtils.cc @@ -137,7 +137,7 @@ ROOT::VecOps::RVec get_theta(const ROOT::VecOps::RVec -FCCAnalysesJet initialise_FCCAnalysesJet(){ +FCCAnalysesJet initialise_FCCAnalysesJet(TString clustering_algo, ROOT::VecOps::RVec clustering_params){ FCCAnalysesJet result; ROOT::VecOps::RVec jets; @@ -145,6 +145,8 @@ FCCAnalysesJet initialise_FCCAnalysesJet(){ result.jets = jets; result.constituents = constituents; + result.clustering_algo = clustering_algo; + result.clustering_params = clustering_params; std::vector exclusive_dmerge; std::vector exclusive_dmerge_max; @@ -159,9 +161,11 @@ FCCAnalysesJet initialise_FCCAnalysesJet(){ FCCAnalysesJet build_FCCAnalysesJet(const std::vector &in, const std::vector &dmerge, - const std::vector &dmerge_max){ + const std::vector &dmerge_max, + TString clustering_algo, + ROOT::VecOps::RVec clustering_params){ - FCCAnalysesJet result = initialise_FCCAnalysesJet(); + FCCAnalysesJet result = initialise_FCCAnalysesJet(clustering_algo, clustering_params); for (const auto& pjet : in) { result.jets.push_back(pjet); diff --git a/analyzers/dataframe/src/JetTaggingUtils.cc b/analyzers/dataframe/src/JetTaggingUtils.cc index 2600cd3d3f..154ee94ada 100644 --- a/analyzers/dataframe/src/JetTaggingUtils.cc +++ b/analyzers/dataframe/src/JetTaggingUtils.cc @@ -63,6 +63,257 @@ ROOT::VecOps::RVec get_flavour(ROOT::VecOps::RVec in, return result; } +ROOT::VecOps::RVec find_ghosts(const ROOT::VecOps::RVec & Particle, + const ROOT::VecOps::RVec & ind) { + + ROOT::VecOps::RVec MCindices; + + std::vector partonStatus = {20, 30}; + + int partonFlag; + + // In loop below ghosts are selected from MCParticle collection + for (size_t i = 0; i < Particle.size(); ++i) { + bool isGhost = false; + + + // Ghost partons as any partons that do not have partons as daughters + if (std::abs(Particle[i].PDG)<=5||Particle[i].PDG==21) { + isGhost = true; + auto daughters = MCParticle::get_list_of_particles_from_decay(i, Particle, ind); + for(auto& daughter_index : daughters){ + if (std::abs(Particle[daughter_index].PDG)<=5||Particle[daughter_index].PDG==21) { + isGhost = false; + break; + } + } + } + + + // Ghost hadrons are selected as b/c hadrons that do not have b/c hadrons as daughters + if ((std::abs(int((Particle[i].PDG/100))%10)==5)||(std::abs(int((Particle[i].PDG/1000))%10)==5)){ + isGhost = true; + auto daughters = MCParticle::get_list_of_particles_from_decay(i, Particle, ind); + for(auto& daughter_index : daughters){ + if((std::abs(int((Particle[daughter_index].PDG/100))%10)==5)||(std::abs(int((Particle[daughter_index].PDG/1000))%10)==5)){ + isGhost = false; + break; + } + } + + } + else if ((std::abs(int((Particle[i].PDG/100))%10)==4)||(std::abs(int((Particle[i].PDG/1000))%10)==4)){ + isGhost = true; + auto daughters = MCParticle::get_list_of_particles_from_decay(i, Particle, ind); + for(auto& daughter_index : daughters){ + if((std::abs(int((Particle[daughter_index].PDG/100))%10)==4)||(std::abs(int((Particle[daughter_index].PDG/1000))%10)==4)){ + isGhost = false; + break; + } + } + } + if (isGhost) MCindices.push_back(i); + } + return MCindices; +} + +JetClusteringUtils::FCCAnalysesJet set_flavour(const ROOT::VecOps::RVec & Particle, + const ROOT::VecOps::RVec & MCindices, + JetClusteringUtils::FCCAnalysesJet & jets, + std::vector & pseudoJets) { + unsigned int index = pseudoJets.size(); + ROOT::VecOps::RVec pdg(pseudoJets.size(),0); + ROOT::VecOps::RVec ghostStatus(pseudoJets.size(),0); + ROOT::VecOps::RVec MCindex(pseudoJets.size(),-1); + + ROOT::VecOps::RVec partonStatus = {20, 30}; + + // In loop below ghosts are selected from MCParticle collection + for (auto & i : MCindices) { + + // Ghost partons as any partons that do not have partons as daughters + if (std::abs(Particle[i].PDG)<=5||Particle[i].PDG==21) { + ghostStatus.push_back(1); + } + + + // Ghost hadrons are selected as b/c hadrons that do not have b/c hadrons as daughters + else if ((std::abs(int((Particle[i].PDG/100))%10)==5)||(std::abs(int((Particle[i].PDG/1000))%10)==5)){ + ghostStatus.push_back(2); + + } + + else if ((std::abs(int((Particle[i].PDG/100))%10)==4)||(std::abs(int((Particle[i].PDG/1000))%10)==4)){ + ghostStatus.push_back(2); + } + else{ + // This should never be executed and would indicate a bug in ghost finding + return jets; + } + + // Ghosts 4-mom is scaled by 10^-18 + + pdg.push_back(Particle[i].PDG); + MCindex.push_back(i); + // the double conversion here is verbose but for precision if I recall... + double px = Particle[i].momentum.x;//*pow(10, -1); + double py = Particle[i].momentum.y; + double pz = Particle[i].momentum.z; + double m = Particle[i].mass; + double E = sqrt(px*px + py*py + pz*pz + m*m); + pseudoJets.emplace_back(px*pow(10, -18), py*pow(10, -18), pz*pow(10, -18), E*pow(10, -18)); + pseudoJets.back().set_user_index(index); + ++index; + + } + JetClusteringUtils::flav_details flavour_details; + + flavour_details.ghostStatus = ghostStatus; + + flavour_details.MCindex = MCindex; + + + // Jet clustering algorithm is run according to user choice m_algo + JetClusteringUtils::FCCAnalysesJet FCCAnalysesGhostJets; + std::vector pseudoJets_(pseudoJets.begin(), pseudoJets.end()); + if (jets.clustering_algo == "ee-kt") { + FCCAnalysesGhostJets = JetClustering::clustering_ee_kt(jets.clustering_params[0], jets.clustering_params[1], jets.clustering_params[2], jets.clustering_params[3])(pseudoJets_); + } + /* + else if (jets.clustering_algo == "anti-kt") FCCAnalysesGhostJets = JetClustering::clustering_antikt(clustering_params[0], clustering_params[1], clustering_params[2], clustering_params[3], clustering_params[4])(pseudoJets); + else if (jets.clustering_algo == "cambridge") FCCAnalysesGhostJets = JetClustering::clustering_cambridge(clustering_params[0], clustering_params[1], clustering_params[2], clustering_params[3], clustering_params[4])(pseudoJets); + else if (jets.clustering_algo == "ee-kt") FCCAnalysesGhostJets = JetClustering::clustering_ee_kt(clustering_params[0], clustering_params[1], clustering_params[2], clustering_params[3])(pseudoJets); + else if (jets.clustering_algo == "ee-genkt") FCCAnalysesGhostJets = JetClustering::clustering_ee_genkt(clustering_params[0], clustering_params[1], clustering_params[2], clustering_params[3], clustering_params[4], m_add1)(pseudoJets); + else if (jets.clustering_algo == "genkt") FCCAnalysesGhostJets = JetClustering::clustering_genkt(clustering_params[0], clustering_params[1], clustering_params[2], clustering_params[3], clustering_params[4], clustering_params[5])(pseudoJets); + else if (jets.clustering_algo == "valencia") FCCAnalysesGhostJets = JetClustering::clustering_valencia(clustering_params[0], clustering_params[1], clustering_params[2], clustering_params[3], clustering_params[4], clustering_params[5], clustering_params[6])(pseudoJets); + else if (jets.clustering_algo == "jade") FCCAnalysesGhostJets = JetClustering::clustering_jade(clustering_params[0], clustering_params[1], clustering_params[2], clustering_params[3], clustering_params[4])(pseudoJets); + */ + else return jets; + + //result.jets = FCCAnalysesGhostJets; + flavour_details.ghost_pseudojets = FCCAnalysesGhostJets.jets; + flavour_details.ghost_jetconstituents = FCCAnalysesGhostJets.constituents; + + // Jet constituents and pseudojets are read from resulting jet struct + + auto ghostJets_ee_kt = JetClusteringUtils::get_pseudoJets(FCCAnalysesGhostJets); + + auto jetconstituents = JetClusteringUtils::get_constituents(FCCAnalysesGhostJets); + + + + + // Flav vector is defined before jets are checked for clustered ghosts + //std::vector> flav_vector; + ROOT::VecOps::RVec flav_vector; + + + ROOT::VecOps::RVec partonFlavs; + ROOT::VecOps::RVec hadronFlavs; + for (auto& consti_index : jetconstituents) { + int partonFlav = 0; + float partonMom2 = 0; + float partonMom2_b = 0; + float partonMom2_c = 0; + int hadronFlav = 0; + float hadronMom2_b = 0; + float hadronMom2_c = 0; + for (auto& i : consti_index) { + + //Parton-flav loop + if (ghostStatus[i]==1) { + if (std::abs(pdg[i])==5){ + if (pseudoJets[i].modp2()>partonMom2_b){ + partonFlav = pdg[i]; + partonMom2_b = pseudoJets[i].modp2(); + } + } + else if ((std::abs(pdg[i])==4) && (std::abs(partonFlav)<5)) { + if (pseudoJets[i].modp2()>partonMom2_c){ + partonFlav = pdg[i]; + partonMom2_c = pseudoJets[i].modp2(); + } + } + else if (((std::abs(pdg[i])==3) || (std::abs(pdg[i])==2) || (std::abs(pdg[i])==1) || (std::abs(pdg[i])==21)) && ((std::abs(partonFlav)<4) || (std::abs(partonFlav)==21))) { + if (pseudoJets[i].modp2()>partonMom2){ + partonFlav = pdg[i]; + partonMom2 = pseudoJets[i].modp2(); + } + } + } + + // Hadron-flav loop + if (ghostStatus[i]==2) { + if ((std::abs(int((pdg[i]/100))%10)==5)||(std::abs(int((pdg[i]/1000))%10)==5)){ + if (pseudoJets[i].modp2()>hadronMom2_b){ + hadronFlav = ((pdg[i]<0)-(pdg[i]>0))*5; + hadronMom2_b = pseudoJets[i].modp2(); + } + } + else if (((std::abs(int((pdg[i]/100))%10)==4)||(std::abs(int((pdg[i]/1000))%10)==4)) && (std::abs(hadronFlav)<5)) { + if (pseudoJets[i].modp2()>hadronMom2_c){ + hadronFlav = ((pdg[i]>0)-(pdg[i]<0))*4; + hadronMom2_c = pseudoJets[i].modp2(); + } + } + } + } + partonFlavs.push_back(partonFlav); + hadronFlavs.push_back(hadronFlav); + if ((std::abs(hadronFlav)>=4)){ + flav_vector.push_back(hadronFlav); + } + else if ((hadronFlav<=3) && ((partonFlav<=3)||(partonFlav==21))){ + flav_vector.push_back(partonFlav); + } + else{ + flav_vector.push_back(0); + } + + } + + flavour_details.parton_flavour = partonFlavs; + flavour_details.hadron_flavour = hadronFlavs; + + //jets.flavour_details = &flavour_details; + jets.flavour_details = flavour_details; + + auto & pseudojets = jets.jets; + auto & ghost_pseudojets = jets.flavour_details.ghost_pseudojets; + + for(int i=0; i get_flavour(const JetClusteringUtils::FCCAnalysesJet & jets){ + return jets.flavour; +} + +JetClusteringUtils::flav_details get_flavour_details(const JetClusteringUtils::FCCAnalysesJet & jets){ + return jets.flavour_details; +} + +ROOT::VecOps::RVec get_flavour(const ROOT::VecOps::RVec & Particle, + const ROOT::VecOps::RVec & ind, + JetClusteringUtils::FCCAnalysesJet & jets, + std::vector & pseudoJets){ + auto MCindices = JetTaggingUtils::find_ghosts(Particle, ind); + jets = JetTaggingUtils::set_flavour(Particle, MCindices, jets, pseudoJets); + + return get_flavour(jets); + +} + + ROOT::VecOps::RVec get_btag(ROOT::VecOps::RVec in, float efficiency, float mistag_c, diff --git a/examples/FCCee/coffea/analysis.py b/examples/FCCee/coffea/analysis.py new file mode 100644 index 0000000000..95deda4951 --- /dev/null +++ b/examples/FCCee/coffea/analysis.py @@ -0,0 +1,136 @@ +import sys +import ROOT + +print ("Load cxx analyzers ... ",) +ROOT.gSystem.Load("libedm4hep") +ROOT.gSystem.Load("libpodio") +ROOT.gSystem.Load("libFCCAnalyses") +ROOT.gErrorIgnoreLevel = ROOT.kFatal +_edm = ROOT.edm4hep.ReconstructedParticleData() +_pod = ROOT.podio.ObjectID() +_fcc = ROOT.dummyLoader + +print ('edm4hep ',_edm) +print ('podio ',_pod) +print ('fccana ',_fcc) + +class analysis(): + + #__________________________________________________________ + def __init__(self, inputlist, outname, ncpu): + self.outname = outname + if ".root" not in outname: + self.outname+=".root" + + ROOT.ROOT.EnableImplicitMT(ncpu) + + self.df = ROOT.RDataFrame("events", inputlist) + print (" done") + #__________________________________________________________ + def run(self): + df2 = (self.df + + .Alias("Particle0", "Particle#0.index") + #.Alias("Particle_index", "Particle.index") + #.Alias("Particle1", "Particle#1.index") + .Alias("Jet3", "Jet#3.index") #alias for dealing with # in python + + + .Define("MC_px", "MCParticle::get_px(Particle)") + .Define("MC_py", "MCParticle::get_py(Particle)") + .Define("MC_pz", "MCParticle::get_pz(Particle)") + .Define("MC_p", "MCParticle::get_p(Particle)") + .Define("MC_e", "MCParticle::get_e(Particle)") + .Define("MC_theta", "MCParticle::get_theta(Particle)") + .Define("MC_pdg", "MCParticle::get_pdg(Particle)") + .Define("MC_status", "MCParticle::get_genStatus(Particle)") + + #selecting only final particles (status=1) + .Define("MC_final", "MCParticle::sel_genStatus(1)(Particle)") + + #momenta etc of final particles + .Define("MC_px_f", "MCParticle::get_px(MC_final)") + .Define("MC_py_f", "MCParticle::get_py(MC_final)") + .Define("MC_pz_f", "MCParticle::get_pz(MC_final)") + .Define("MC_e_f", "MCParticle::get_e(MC_final)") + .Define("MC_theta_f", "MCParticle::get_theta(MC_final)") + .Define("MC_pdg_f", "MCParticle::get_pdg(MC_final)") + + + #EE-KT ALGORITHM + + #build psedo-jets with the MC final particles (status = 0) + .Define("pseudo_jets", "JetClusteringUtils::set_pseudoJets(MC_px_f, MC_py_f, MC_pz_f, MC_e_f)") + + #run jet clustering with final state MC particles. ee_kt_algorithm, exclusive clustering, exactly 2 jets, E-scheme + .Define("FCCAnalysesJets_ee_kt", "JetClustering::clustering_ee_kt(2, 2, 1, 0)(pseudo_jets)") + + #get the jets out of the structure + .Define("jets_ee_kt", "JetClusteringUtils::get_pseudoJets(FCCAnalysesJets_ee_kt)") + + #get the jet constituents out of the structure + .Define("jetconstituents_ee_kt", "JetClusteringUtils::get_constituents(FCCAnalysesJets_ee_kt)") + + #get some jet variables + .Define("jets_ee_kt_e", "JetClusteringUtils::get_e(jets_ee_kt)") + .Define("jets_ee_kt_px", "JetClusteringUtils::get_px(jets_ee_kt)") + .Define("jets_ee_kt_py", "JetClusteringUtils::get_py(jets_ee_kt)") + .Define("jets_ee_kt_pz", "JetClusteringUtils::get_pz(jets_ee_kt)") + .Define("jets_ee_kt_flavour", "JetTaggingUtils::get_flavour(jets_ee_kt, Particle)") + ) + + + + + # select branches for output file + branchList = ROOT.vector('string')() + for branchName in [ + "MC_px", + "MC_py", + "MC_pz", + "MC_p", + "MC_e", + "MC_theta", + "MC_pdg", + "MC_status", + + "MC_px_f", + "MC_py_f", + "MC_pz_f", + "MC_e_f", + "MC_theta_f", + "MC_pdg_f", + + "jets_ee_kt_e", + "jets_ee_kt_px", + "jets_ee_kt_py", + "jets_ee_kt_pz", + "jets_ee_kt_flavour", + "jetconstituents_ee_kt", + ]: + branchList.push_back(branchName) + df2.Snapshot("events", self.outname, branchList) + +# example call for standalone file +# python examples/FCCee/higgs/mH-recoil/mumu/analysis.py /eos/experiment/fcc/ee/generation/DelphesEvents/fcc_tmp/p8_ee_ZH_ecm240/events_058720051.root +if __name__ == "__main__": + + if len(sys.argv)==1: + print ("usage:") + print ("python ",sys.argv[0]," file.root") + sys.exit(3) + infile = sys.argv[1] + outDir = sys.argv[0].replace(sys.argv[0].split('/')[0],'outputs').replace('analysis.py','')+'/' + import os + os.system("mkdir -p {}".format(outDir)) + outfile = outDir+infile.split('/')[-1] + ncpus = 0 + print ('outfile ',outfile) + analysis = analysis(infile, outfile, ncpus) + analysis.run() + + tf = ROOT.TFile(infile) + entries = tf.events.GetEntries() + p = ROOT.TParameter(int)( "eventsProcessed", entries) + outf=ROOT.TFile(outfile,"UPDATE") + p.Write() diff --git a/examples/FCCee/coffea/finalSel_coffea.py b/examples/FCCee/coffea/finalSel_coffea.py new file mode 100644 index 0000000000..8555b39da6 --- /dev/null +++ b/examples/FCCee/coffea/finalSel_coffea.py @@ -0,0 +1,176 @@ +import uproot +import awkward as ak +import h5py as h5 +import numpy as np +import matplotlib.pyplot as plt +from collections import defaultdict +from coffea import hist, processor +from coffea.nanoevents.methods import vector + + +ak.behavior.update(vector.behavior) + + + +# As detailed in the coffea docs, the core of the analysis script is the Processor class defined below. The Processor.process function will be called on every batch being processed. + +class MyProcessor(processor.ProcessorABC): + def __init__(self): + + + # We begin by defining the accumulator which will hold all quantities we intend to output. In this case it will be only the Z invariant mass and the flavour of the quarks. + + Z_accumulator = { + 'Z_mass': processor.column_accumulator(np.zeros([0])), + 'pid': processor.column_accumulator(np.zeros([0])), + } + self._accumulator = processor.dict_accumulator( Z_accumulator ) + + @property + def accumulator(self): + return self._accumulator + + + + def process(self, events): + + output = self.accumulator.identity() + + + # Here, the jets are defined LorentzVector classes (meaning several attributes and operations, such as jets.eta, will be accessible) + + jets = ak.zip({ + "t": events.jets_ee_kt_e, + "x": events.jets_ee_kt_px, + "y": events.jets_ee_kt_py, + "z": events.jets_ee_kt_pz, + "partonFlavour": events.jets_ee_kt_flavour, + }, with_name="LorentzVector") + + + # The (stable) gen level particles are defined similarly, but with their corresponding arrays + + GenPartsf = ak.zip({ + "t": events.MC_e_f, + "x": events.MC_px_f, + "y": events.MC_py_f, + "z": events.MC_pz_f, + "pdg": events.MC_pdg_f, + }, with_name="LorentzVector") + + + + # Here the flavour mask is defined by requiring both the quark and antiquark initiated jets to have the same flavour (which is non-negative). Unmatched or incorrectly matched jets result in a flavour of 0. + + up_mask = (np.abs(jets.partonFlavour[:,0])==2)&(np.abs(jets.partonFlavour[:,1])==2) + down_mask = (np.abs(jets.partonFlavour[:,0])==1)&(np.abs(jets.partonFlavour[:,1])==1) + strange_mask = (np.abs(jets.partonFlavour[:,0])==3)&(np.abs(jets.partonFlavour[:,1])==3) + charm_mask = (np.abs(jets.partonFlavour[:,0])==4)&(np.abs(jets.partonFlavour[:,1])==4) + bottom_mask = (np.abs(jets.partonFlavour[:,0])==5)&(np.abs(jets.partonFlavour[:,1])==5) + + pid = 2*up_mask+down_mask+3*strange_mask+4*charm_mask+5*bottom_mask + + + + # jets[:,::2] selects only the leading jet of an event, while jets[:,1::2] selects the subleading. In this way the Z candidates are computed, having the same LorentzVector structure as the jets from which they are computed. + + Z_cand = jets[:,::2]+jets[:,1::2] + + + + + #---------------------------------------------------------------------------------------------------------------------------------------------------------------------- + ######################### + #ASIDE: Jet constituents# + ######################### + + # Though not relevant in this simple example, in most cases one will want to have access to the jet constituents clustered into the first and second jets. This is possible by passing events.jetconstituents_ee_kt as a mask to the (stable) gen level particles. The events.jetconstituents_ee_kt array contains the indices of each (stable) gen level particle clustered into a given jet, and can thus be used to define a new collection consisting only the the particles in a given jet. + + # For instance, the jet constituents of the leading jet are defined as + + firstjetGenPartsf = ak.copy(GenPartsf[events.jetconstituents_ee_kt[:,0]]) + + # While the jet consituents of the sub-leading jet are defined as + + secondjetGenPartsf = ak.copy(GenPartsf[events.jetconstituents_ee_kt[:,1]]) + #------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + + # Here the Z candidates with undefined flavours are removed. Note that this syntax is general and could be used for any cuts e.g. kinematic + # Requiring a jet mass > 80 GeV would be + # Z_cand = Z_cand[Z_cand.mass>80] + + Z_cand = Z_cand[pid!=0] + pid = pid[pid!=0] + + + output['Z_mass']+= processor.column_accumulator(np.asarray(ak.flatten(Z_cand.mass))) + output['pid']+= processor.column_accumulator(np.asarray(pid)) + + return output + + def postprocess(self, accumulator): + return accumulator + +filename = "outputs/FCCee/coffea/p8_ee_Zuds_ecm91.root" +fileset = {'Zqq': [filename]} + +# The processor run_uproot_job loads the events via uproot and executes process function on the defined batches. The 'maxchunks' argument defined the maximum number of chunks that will be processed, while the 'chunksize' defines the number of events per chunk. The 'workers' argument defines how many parallel processes are run. + +output = processor.run_uproot_job(fileset, + treename='events', + processor_instance=MyProcessor(), + executor=processor.futures_executor, + executor_args={'schema': None, 'workers':4,}, + maxchunks =20, + chunksize = 5000, +) + + +########## +#PLOTTING# +########## + +Z_mass = output['Z_mass'].value +pid = output['pid'].value + +# Here the Z invariant mass is histogrammed using coffea histograms. Subsequently, the invariant mass and quark flavour is saved as an .h5 file. + +Z_cand = hist.Hist( + "Events", + hist.Cat("flavour", "Quark flavour"), + hist.Bin("m", r"$m_{q \bar{q}}$ [GeV]", 30, 90.75, 91.65), +) + +for i_f, flav in enumerate(["down", "up", "strange"]): + Z_cand.fill( + flavour=flav, + m=Z_mass[(pid==(i_f+1))], + ) + +ax = hist.plot1d( + Z_cand, + overlay="flavour", + stack=True, + fill_opts={'alpha': .5, 'edgecolor': (0,0,0,0.3)} +) + +ax.get_legend().shadow = True +ax.set_title(r'$Z \rightarrow q\bar{q}$ invariant mass') + +plt.savefig('examples/FCCee/coffea/Zpeak.pdf') + +h5_file = h5.File('examples/FCCee/coffea/Zpeak.h5', 'w') +h5_file.create_dataset("Z_mass", data=Z_mass) +h5_file.create_dataset("pid", data=pid) + + + + + + + + + + + + diff --git a/examples/FCCee/coffea/finalSel_coffea_png.py b/examples/FCCee/coffea/finalSel_coffea_png.py new file mode 100644 index 0000000000..ec15d74dcc --- /dev/null +++ b/examples/FCCee/coffea/finalSel_coffea_png.py @@ -0,0 +1,244 @@ +import uproot +import awkward as ak +import h5py as h5 +import numpy as np +import matplotlib.pyplot as plt +from collections import defaultdict +from coffea import hist, processor +from coffea.nanoevents.methods import vector + + +ak.behavior.update(vector.behavior) + + + +# As detailed in the coffea docs, the core of the analysis script is the Processor class defined below. The Processor.process function will be called on every batch being processed. + +class MyProcessor(processor.ProcessorABC): + def __init__(self): + + + # We begin by defining the accumulator which will hold all quantities we intend to output. In this case it will be only the Z invariant mass and the flavour of the quarks. + + Z_accumulator = { + 'Z_mass': processor.column_accumulator(np.zeros([0])), + 'pid': processor.column_accumulator(np.zeros([0])), + 'pid_Kl': processor.column_accumulator(np.zeros([0])), + 'Kl_p': processor.column_accumulator(np.zeros([0])), + 'dKl_theta': processor.column_accumulator(np.zeros([0])), + 'dKl_phi': processor.column_accumulator(np.zeros([0])), + } + self._accumulator = processor.dict_accumulator( Z_accumulator ) + + @property + def accumulator(self): + return self._accumulator + + + + def process(self, events): + + output = self.accumulator.identity() + + + # Here, the jets are defined LorentzVector classes (meaning several attributes and operations, such as jets.eta, will be accessible) + + jets = ak.zip({ + "t": events.jets_ee_kt_e, + "x": events.jets_ee_kt_px, + "y": events.jets_ee_kt_py, + "z": events.jets_ee_kt_pz, + "partonFlavour": events.jets_ee_kt_flavour, + }, with_name="LorentzVector") + + + # The (stable) gen level particles are defined similarly, but with their corresponding arrays + + GenPartsf = ak.zip({ + "t": events.MC_e_f, + "x": events.MC_px_f, + "y": events.MC_py_f, + "z": events.MC_pz_f, + "pdg": events.MC_pdg_f, + }, with_name="LorentzVector") + + + + # Here the flavour mask is defined by requiring both the quark and antiquark initiated jets to have the same flavour (which is non-negative). Unmatched or incorrectly matched jets result in a flavour of 0. + + up_mask = (np.abs(jets.partonFlavour[:,0])==2)&(np.abs(jets.partonFlavour[:,1])==2) + down_mask = (np.abs(jets.partonFlavour[:,0])==1)&(np.abs(jets.partonFlavour[:,1])==1) + strange_mask = (np.abs(jets.partonFlavour[:,0])==3)&(np.abs(jets.partonFlavour[:,1])==3) + charm_mask = (np.abs(jets.partonFlavour[:,0])==4)&(np.abs(jets.partonFlavour[:,1])==4) + bottom_mask = (np.abs(jets.partonFlavour[:,0])==5)&(np.abs(jets.partonFlavour[:,1])==5) + + pid = 2*up_mask+down_mask+3*strange_mask+4*charm_mask+5*bottom_mask + + + + + # Here cuts are implemented theta cut > ~ 14deg |p|>40 GeV + + jets_mask = (np.cos(jets.theta)<0.97) & (jets.p>40) + + event_mask = (ak.num(jets[jets_mask].theta, axis=1)==2) & (pid!=0) + + + jets_cut = ak.copy(jets[event_mask]) + GenPartsf_cut = ak.copy(GenPartsf[event_mask]) + pid = pid[event_mask] + + # A remark: It is important to cut on jet constituents after having assigned to constituents to their respective jet, as these cuts jet consitutent arrays will no longer match the indices + # given in events.jetconstituents_ee_kt. In this example we consider only the leading jet. + + # Here cuts are imposed on the jetConstituents of the leading jet theta cut > ~ 14 deg pt>500 GeV + leadingjetGenPartsf = ak.copy(GenPartsf[events.jetconstituents_ee_kt[:,0]]) + + leadingjetGenPartsf = leadingjetGenPartsf[event_mask] + + jetConstituents_mask = (np.cos(leadingjetGenPartsf.theta)<0.97) & (leadingjetGenPartsf.r>0.5) + leadingjetGenPartsf = leadingjetGenPartsf[jetConstituents_mask] + + + # Here we compute first the Z_candidates + + Z_cand = jets_cut[:,0] + jets_cut[:,1] + + # Here we compute |p| distribution of long Kaons in the leading jet + + longKaon_mask = (np.abs(leadingjetGenPartsf.pdg)==130) + longKaon_p = leadingjetGenPartsf[longKaon_mask].p + + # Here we compute the angular distance of long Kaons in the leading jet + + theta_diff = jets_cut.theta[:,0] - leadingjetGenPartsf[longKaon_mask].theta + phi_diff = jets_cut.phi[:,0] - leadingjetGenPartsf[longKaon_mask].phi + phi_diff = ((phi_diff+np.pi)%(2*np.pi))-np.pi + + + pid_Kl = ak.flatten(ak.broadcast_arrays(pid, phi_diff)[0], axis=None) + + output['Z_mass']+= processor.column_accumulator(np.asarray(Z_cand.mass)) + output['pid']+= processor.column_accumulator(np.asarray(pid)) + output['pid_Kl']+= processor.column_accumulator(np.asarray(pid_Kl)) + output['Kl_p']+= processor.column_accumulator(np.asarray(ak.flatten(longKaon_p))) + output['dKl_theta']+= processor.column_accumulator(np.asarray(ak.flatten(theta_diff))) + output['dKl_phi']+= processor.column_accumulator(np.asarray(ak.flatten(phi_diff))) + + return output + + def postprocess(self, accumulator): + return accumulator + +filename = "outputs/FCCee/Zqq/p8_ee_Zuds_ecm91.root" +fileset = {'Zqq': [filename]} + +# The processor run_uproot_job loads the events via uproot and executes process function on the defined batches. The 'maxchunks' argument defined the maximum number of chunks that will be processed, while the 'chunksize' defines the number of events per chunk. The 'workers' argument defines how many parallel processes are run. + +output = processor.run_uproot_job(fileset, + treename='events', + processor_instance=MyProcessor(), + executor=processor.futures_executor, + executor_args={'schema': None, 'workers':4,}, + maxchunks =20, + chunksize = 5000, +) + + +########## +#PLOTTING# +########## + +Z_mass = output['Z_mass'].value +pid = output['pid'].value +pid_Kl = output['pid_Kl'].value +Kl_p = output['Kl_p'].value +dKl_theta = output['dKl_theta'].value +dKl_phi = output['dKl_phi'].value + + +# Here the Z invariant mass is histogrammed using coffea histograms. + +Z_cand_hist = hist.Hist( + "Events", + hist.Cat("flavour", "Quark flavour"), + hist.Bin("m", r"$m_{q \bar{q}}$ [GeV]", 30, 90.75, 91.65), +) + +for i_f, flav in enumerate(["down", "up", "strange"]): + Z_cand_hist.fill( + flavour=flav, + m=Z_mass[(pid==(i_f+1))], + ) + +ax = hist.plot1d( + Z_cand_hist, + overlay="flavour", + stack=True, + fill_opts={'alpha': .5, 'edgecolor': (0,0,0,0.3)} +) + +ax.get_legend().shadow = True +ax.set_title(r'$Z \rightarrow q\bar{q}$ invariant mass') + +plt.savefig('examples/FCCee/Zqq/Zpeak.png') + +plt.clf() + +Kl_p_hist = hist.Hist( + "Constituent Count", + hist.Cat("flavour", "Quark flavour"), + hist.Bin("p", "|$p$| [GeV]", 38, 0, 37), +) + +for i_f, flav in enumerate(["down", "up", "strange"]): + Kl_p_hist.fill( + flavour=flav, + p=Kl_p[(pid_Kl==(i_f+1))], + ) + +ax = hist.plot1d( + Kl_p_hist, + overlay="flavour", + stack=True, + fill_opts={'alpha': .5, 'edgecolor': (0,0,0,0.3)} +) + + +ax.get_legend().shadow = True +ax.set_title(r'$K_{L}$ |p| distribution in $Z \rightarrow q\bar{q}$ leading jets') + +plt.savefig('examples/FCCee/Zqq/Kl_p.pdf') + +plt.clf() + + +Kl_angle_hist = hist.Hist( + "Constituent Count", + hist.Bin("theta", r"$\Delta \theta$ [rad]", 29, -0.5, 0.5), + hist.Bin("phi", r"$\Delta \phi$ [rad]", 29, -0.5, 0.5), +) + + +Kl_angle_hist.fill(theta=dKl_theta[(pid_Kl==3)], phi=dKl_phi[(pid_Kl==3)]) + +ax = hist.plot2d( + Kl_angle_hist, + xaxis = "phi", +) + +ax.set_title(r'$K_{L}$ distribution in $Z \rightarrow s\bar{s}$ leading jets') + +plt.savefig('examples/FCCee/Zqq/Kl_angle_hist.pdf') + + + + + + + + + + + + diff --git a/examples/FCCee/coffea/preSel.py b/examples/FCCee/coffea/preSel.py new file mode 100644 index 0000000000..640692488a --- /dev/null +++ b/examples/FCCee/coffea/preSel.py @@ -0,0 +1,20 @@ +#python examples/FCCee/KG/preSel.py + +from config.common_defaults import deffccdicts +import os + +basedir=os.path.join(os.getenv('FCCDICTSDIR', deffccdicts), '') + "yaml/FCCee/spring2021/IDEA/" +outdir="outputs/FCCee/coffea/" + +import multiprocessing +NUM_CPUS = int(multiprocessing.cpu_count()-2) + +process_list=['p8_ee_Zuds_ecm91'] + +#Very low fraction chosen to indicate single file +fraction=0.000001 +#fraction=0.0001 + +import config.runDataFrame as rdf +myana=rdf.runDataFrame(basedir,process_list) +myana.run(ncpu=NUM_CPUS,fraction=fraction,outDir=outdir) diff --git a/examples/FCCee/ghostFlavour/analysis.py b/examples/FCCee/ghostFlavour/analysis.py new file mode 100644 index 0000000000..0ef6618b38 --- /dev/null +++ b/examples/FCCee/ghostFlavour/analysis.py @@ -0,0 +1,133 @@ +#Mandatory: List of processes +processList = { + 'p8_ee_Zbb_ecm91':{'fraction':0.000000000001, 'chunks':1}, #Run 50% of the statistics in two files named /p8_ee_WW_ecm240/chunk.root +} + +#Mandatory: Production tag when running over EDM4Hep centrally produced events, this points to the yaml files for getting sample statistics +prodTag = "FCCee/spring2021/IDEA/" + +#Optional: output directory, default is local running directory +outputDir = "outputs/FCCee/ghostFlavour" + +#Optional: analysisName, default is "" +#analysisName = "My Analysis" + +#Optional: ncpus, default is 4 +#nCPUS = 8 +nCPUS = 1 + +#Optional running on HTCondor, default is False +#runBatch = False + +#Optional batch queue name when running on HTCondor, default is workday +#batchQueue = "longlunch" + +#Optional computing account when running on HTCondor, default is group_u_FCC.local_gen +#compGroup = "group_u_FCC.local_gen" + +#Optional test file +#testFile ="root://eospublic.cern.ch//eos/experiment/fcc/ee/generation/DelphesEvents/spring2021/IDEA/p8_ee_ZH_ecm240/events_101027117.root" + +#nevents=10 + +#Mandatory: RDFanalysis class where the use defines the operations on the TTree +class RDFanalysis(): + + #__________________________________________________________ + #Mandatory: analysers funtion to define the analysers to process, please make sure you return the last dataframe, in this example it is df2 + def analysers(df): + df2 = ( + df + #alias for dealing with # in python + .Alias("Particle0", "Particle#0.index") + .Alias("Particle1", "Particle#1.index") + .Alias("Jet3","Jet#3.index") + + .Define("MC_px", "MCParticle::get_px(Particle)") + .Define("MC_py", "MCParticle::get_py(Particle)") + .Define("MC_pz", "MCParticle::get_pz(Particle)") + .Define("MC_p", "MCParticle::get_p(Particle)") + .Define("MC_e", "MCParticle::get_e(Particle)") + .Define("MC_m", "MCParticle::get_mass(Particle)") + .Define("MC_theta", "MCParticle::get_theta(Particle)") + .Define("MC_pdg", "MCParticle::get_pdg(Particle)") + .Define("MC_status", "MCParticle::get_genStatus(Particle)") + + + #define the RP px, py, pz and e + .Define("RP_px", "ReconstructedParticle::get_px(ReconstructedParticles)") + .Define("RP_py", "ReconstructedParticle::get_py(ReconstructedParticles)") + .Define("RP_pz", "ReconstructedParticle::get_pz(ReconstructedParticles)") + .Define("RP_m", "ReconstructedParticle::get_mass(ReconstructedParticles)") + + ################ + #Jet Clustering# + ################ + + #build pseudo jets with the RP, using the interface that takes px,py,pz,m for better + #handling of rounding errors + .Define("pseudo_jets", "JetClusteringUtils::set_pseudoJets_xyzm(RP_px, RP_py, RP_pz, RP_m)") + + #EE-KT ALGORITHM + #run jet clustering with all MC particles. ee_kt_algorithm, exclusive clustering, exactly 2 jets, E-scheme + .Define("FCCAnalysesJets_ee_kt", "JetClustering::clustering_ee_kt(2, 2, 1, 0)(pseudo_jets)") + + #get the jets out of the structure + .Define("jets_ee_kt", "JetClusteringUtils::get_pseudoJets(FCCAnalysesJets_ee_kt)") + + #get the jet constituents out of the structure + .Define("jetconstituents_ee_kt", "JetClusteringUtils::get_constituents(FCCAnalysesJets_ee_kt)") + + #get some jet variables + .Define("jets_ee_kt_e", "JetClusteringUtils::get_e(jets_ee_kt)") + .Define("jets_ee_kt_px", "JetClusteringUtils::get_px(jets_ee_kt)") + .Define("jets_ee_kt_py", "JetClusteringUtils::get_py(jets_ee_kt)") + .Define("jets_ee_kt_pz", "JetClusteringUtils::get_pz(jets_ee_kt)") + .Define("jets_ee_kt_flavour", "JetTaggingUtils::get_flavour(jets_ee_kt, Particle)") + + ############### + #Ghost Flavour# + ############### + + # First the ghost indices are found using the find_ghosts function which returns an int RVec + .Define("ghost_MCindices", "JetTaggingUtils::find_ghosts(Particle, Particle1)") + + # Then the flavour is "set" by writing the ghost flavour to the jets.flavour field defined in the FCCAnalysesJet. This encompases rescaling of ghosts, their appending to the + # pseudojet set, the reclustering, and the kinematic check that the reclustered ghost jets have the same p_i as the original jet set. + .Define("FCCAnalysesJets_wflavour", "JetTaggingUtils::set_flavour(Particle, ghost_MCindices, FCCAnalysesJets_ee_kt, pseudo_jets)") + + # The flavour can simply be read-off from the jets.flavour field, returning and int RVec with the jet flavours + .Define("jet_flavour", "JetTaggingUtils::get_flavour(FCCAnalysesJets_wflavour)") + + # Or (as most might prefer) the above steps can be combined by overloading and providing all relevant parameters to get_flavour(...) + .Define("jet_flavour1", "JetTaggingUtils::get_flavour(Particle, Particle1, FCCAnalysesJets_ee_kt, pseudo_jets)") + + ) + return df2 + + #__________________________________________________________ + #Mandatory: output function, please make sure you return the branchlist as a python list + def output(): + branchList = [ + "MC_px", + "MC_py", + "MC_pz", + "MC_p", + "MC_e", + "MC_theta", + "MC_pdg", + "MC_status", + + "jets_ee_kt_e", + "jets_ee_kt_px", + "jets_ee_kt_py", + "jets_ee_kt_pz", + "jets_ee_kt_flavour", + "jetconstituents_ee_kt", + + + "ghost_MCindices", + "jet_flavour", + "jet_flavour1", + ] + return branchList