From da7e99ecfb7b20a438bb885ded228a2770c9beae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= Date: Tue, 2 Apr 2024 09:43:49 +0200 Subject: [PATCH] merging --- src/hepconvert/__main__.py | 7 ----- src/hepconvert/copy_root.py | 4 +-- src/hepconvert/parquet_to_root.py | 33 ++++++++++++++------ tests/test_add_histograms.py | 51 ++++++++++++------------------- 4 files changed, 46 insertions(+), 49 deletions(-) diff --git a/src/hepconvert/__main__.py b/src/hepconvert/__main__.py index bf526f7..2a19d8b 100644 --- a/src/hepconvert/__main__.py +++ b/src/hepconvert/__main__.py @@ -513,13 +513,6 @@ def root_to_parquet( expressions=None, force=False, step_size="100 MB", - tree=None, - drop_branches=None, - keep_branches=None, - cut=None, - expressions=None, - force=False, - step_size="100 MB", list_to32=False, string_to32=True, bytestring_to32=True, diff --git a/src/hepconvert/copy_root.py b/src/hepconvert/copy_root.py index 1ff840a..252cd63 100644 --- a/src/hepconvert/copy_root.py +++ b/src/hepconvert/copy_root.py @@ -18,7 +18,7 @@ def copy_root( *, keep_branches=None, drop_branches=None, - # add_branches=None, #TO-DO: add functionality for this, just specify about the counter issue + # add_branches=None, #TODO: add functionality for this, just specify about the counter issue? keep_trees=None, drop_trees=None, cut=None, @@ -26,7 +26,7 @@ def copy_root( progress_bar=None, force=False, fieldname_separator="_", - # fix_duplicate_counters=False, #TO-DO: ask about this? + # fix_duplicate_counters=False, #TODO: ask about this? title="", field_name=lambda outer, inner: inner if outer == "" else outer + "_" + inner, initial_basket_capacity=10, diff --git a/src/hepconvert/parquet_to_root.py b/src/hepconvert/parquet_to_root.py index 0c26927..61492ce 100644 --- a/src/hepconvert/parquet_to_root.py +++ b/src/hepconvert/parquet_to_root.py @@ -13,16 +13,17 @@ def parquet_to_root( file, *, name="tree", + force=False, branch_types=None, progress_bar=False, + append=False, title="", field_name=lambda outer, inner: inner if outer == "" else outer + "_" + inner, initial_basket_capacity=10, counter_name=lambda counted: "n" + counted, resize_factor=10.0, - compression="zlib", + compression="ZLIB", compression_level=1, - force=True, ): """Converts a Parquet file into a ROOT file. Data is stored in one TTree, which has a name defined by argument ``name``. @@ -84,7 +85,27 @@ def parquet_to_root( raise ValueError(msg) path = Path(destination) if Path.is_file(path) and not force: - raise FileExistsError + msg = f"File {path} already exists. To overwrite it, set force=True." + raise FileExistsError(msg) + if append: + if Path.is_file(path): + out_file = uproot.update( + destination, + compression=uproot.compression.Compression.from_code_pair( + compression_code, compression_level + ), + ) + else: + msg = "Cannot append to a non-existent file." + raise FileNotFoundError(msg) + + else: + out_file = uproot.recreate( + destination, + compression=uproot.compression.Compression.from_code_pair( + compression_code, compression_level + ), + ) metadata = ak.metadata_from_parquet(file) if progress_bar: if progress_bar is True: @@ -93,12 +114,6 @@ def parquet_to_root( progress_bar = tqdm.tqdm(desc="Row-groups written") progress_bar.reset(number_of_items) - out_file = uproot.recreate( - destination, - compression=uproot.compression.Compression.from_code_pair( - compression_code, compression_level - ), - ) chunk = ak.from_parquet(file, row_groups=[0]) if not branch_types: diff --git a/tests/test_add_histograms.py b/tests/test_add_histograms.py index 01cdedf..b30c051 100644 --- a/tests/test_add_histograms.py +++ b/tests/test_add_histograms.py @@ -12,7 +12,6 @@ # ruff: noqa: PTH118 - def test_simple(tmp_path): gauss_1 = ROOT.TH1I("name", "title", 5, -4, 4) gauss_1.FillRandom("gaus") @@ -45,16 +44,7 @@ def test_simple(tmp_path): h3 = uproot.from_pyroot(gauss_3) destination = os.path.join(tmp_path, "destination.root") - hepconvert.add_histograms( - destination, - [ - os.path.join(tmp_path, "file1.root"), - os.path.join(tmp_path, "file2.root"), - os.path.join(tmp_path, "file3.root"), - ], - force=True, - progress_bar=True, - ) + hepconvert.add_histograms(destination, [os.path.join(tmp_path, "file1.root"), os.path.join(tmp_path, "file2.root"), os.path.join(tmp_path, "file3.root")], force=True, progress_bar=True) with uproot.open(destination) as file: added = uproot.from_pyroot( gauss_1 + gauss_2 + gauss_3 @@ -71,12 +61,12 @@ def test_simple(tmp_path): ).all -def mult_1D(tmp_path): +def mult_1D(tmp_path, file_paths): gauss_1 = ROOT.TH1I("name1", "title", 5, -4, 4) gauss_1.FillRandom("gaus") gauss_1.Sumw2() gauss_1.SetDirectory(0) - outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file1.root"), "RECREATE") + outHistFile = ROOT.TFile.Open(file_paths[0], "RECREATE") outHistFile.cd() gauss_1.Write() outHistFile.Close() @@ -86,7 +76,7 @@ def mult_1D(tmp_path): gauss_2.FillRandom("gaus") gauss_2.Sumw2() gauss_2.SetDirectory(0) - outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file1.root"), "UPDATE") + outHistFile = ROOT.TFile.Open(file_paths[0], "UPDATE") outHistFile.cd() gauss_2.Write() outHistFile.Close() @@ -96,7 +86,7 @@ def mult_1D(tmp_path): gauss_3.FillRandom("gaus") gauss_3.Sumw2() gauss_3.SetDirectory(0) - outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file2.root"), "RECREATE") + outHistFile = ROOT.TFile.Open(file_paths[1], "RECREATE") outHistFile.cd() gauss_3.Write() outHistFile.Close() @@ -106,7 +96,7 @@ def mult_1D(tmp_path): gauss_4.FillRandom("gaus") gauss_4.Sumw2() gauss_4.SetDirectory(0) - outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file2.root"), "UPDATE") + outHistFile = ROOT.TFile.Open(file_paths[1], "UPDATE") outHistFile.cd() gauss_4.Write() outHistFile.Close() @@ -116,7 +106,7 @@ def mult_1D(tmp_path): gauss_5.FillRandom("gaus") gauss_5.Sumw2() gauss_5.SetDirectory(0) - outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file3.root"), "RECREATE") + outHistFile = ROOT.TFile.Open(file_paths[2], "RECREATE") outHistFile.cd() gauss_5.Write() outHistFile.Close() @@ -126,23 +116,14 @@ def mult_1D(tmp_path): gauss_6.FillRandom("gaus") gauss_6.Sumw2() gauss_6.SetDirectory(0) - outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file3.root"), "UPDATE") + outHistFile = ROOT.TFile.Open(file_paths[2], "UPDATE") outHistFile.cd() gauss_6.Write() outHistFile.Close() h6 = uproot.from_pyroot(gauss_6) destination = os.path.join(tmp_path, "destination.root") - hepconvert.add_histograms( - destination, - [ - os.path.join(tmp_path, "file1.root"), - os.path.join(tmp_path, "file2.root"), - os.path.join(tmp_path, "file3.root"), - ], - force=True, - same_names=False, - ) + hepconvert.add_histograms(destination, file_paths, force=True, same_names=False) with uproot.open(destination) as file: added = uproot.from_pyroot( @@ -507,7 +488,9 @@ def simple_2D(tmp_path): for i in range(len(data2)): for j in range(len(data2[0])): h2.Fill(i, j, data2[i][j]) - outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file2dim2.root"), "RECREATE") + outHistFile = ROOT.TFile.Open( + os.path.join(tmp_path, "file2dim2.root"), "RECREATE" + ) outHistFile.cd() h2.Write() outHistFile.Close() @@ -528,7 +511,9 @@ def simple_2D(tmp_path): for j in range(len(data1[0])): h1.Fill(i, j, data1[i][j]) - outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file1dim2.root"), "RECREATE") + outHistFile = ROOT.TFile.Open( + os.path.join(tmp_path, "file1dim2.root"), "RECREATE" + ) outHistFile.cd() h1.Write() outHistFile.Close() @@ -543,7 +528,9 @@ def simple_2D(tmp_path): force=True, ) - with uproot.open(os.path.join(tmp_path, "place2.root")) as file: + with uproot.open( + os.path.join(tmp_path, "place2.root") + ) as file: assert file["name"].member("fN") == h1.member("fN") assert file["name"].member("fTsumw") == h1.member("fTsumw") + h2.member( "fTsumw" @@ -606,3 +593,5 @@ def break_bins(tmp_path): ], force=True, ) + +