Skip to content

Commit

Permalink
merging
Browse files Browse the repository at this point in the history
  • Loading branch information
zbilodea committed Apr 2, 2024
1 parent 2cd942a commit da7e99e
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 49 deletions.
7 changes: 0 additions & 7 deletions src/hepconvert/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,13 +513,6 @@ def root_to_parquet(
expressions=None,
force=False,
step_size="100 MB",
tree=None,
drop_branches=None,
keep_branches=None,
cut=None,
expressions=None,
force=False,
step_size="100 MB",
list_to32=False,
string_to32=True,
bytestring_to32=True,
Expand Down
4 changes: 2 additions & 2 deletions src/hepconvert/copy_root.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ def copy_root(
*,
keep_branches=None,
drop_branches=None,
# add_branches=None, #TO-DO: add functionality for this, just specify about the counter issue
# add_branches=None, #TODO: add functionality for this, just specify about the counter issue?
keep_trees=None,
drop_trees=None,
cut=None,
expressions=None,
progress_bar=None,
force=False,
fieldname_separator="_",
# fix_duplicate_counters=False, #TO-DO: ask about this?
# fix_duplicate_counters=False, #TODO: ask about this?
title="",
field_name=lambda outer, inner: inner if outer == "" else outer + "_" + inner,
initial_basket_capacity=10,
Expand Down
33 changes: 24 additions & 9 deletions src/hepconvert/parquet_to_root.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,17 @@ def parquet_to_root(
file,
*,
name="tree",
force=False,
branch_types=None,
progress_bar=False,
append=False,
title="",
field_name=lambda outer, inner: inner if outer == "" else outer + "_" + inner,
initial_basket_capacity=10,
counter_name=lambda counted: "n" + counted,
resize_factor=10.0,
compression="zlib",
compression="ZLIB",
compression_level=1,
force=True,
):
"""Converts a Parquet file into a ROOT file. Data is stored in one TTree, which has a name defined by argument ``name``.
Expand Down Expand Up @@ -84,7 +85,27 @@ def parquet_to_root(
raise ValueError(msg)
path = Path(destination)
if Path.is_file(path) and not force:
raise FileExistsError
msg = f"File {path} already exists. To overwrite it, set force=True."
raise FileExistsError(msg)
if append:
if Path.is_file(path):
out_file = uproot.update(
destination,
compression=uproot.compression.Compression.from_code_pair(
compression_code, compression_level
),
)
else:
msg = "Cannot append to a non-existent file."
raise FileNotFoundError(msg)

else:
out_file = uproot.recreate(
destination,
compression=uproot.compression.Compression.from_code_pair(
compression_code, compression_level
),
)
metadata = ak.metadata_from_parquet(file)
if progress_bar:
if progress_bar is True:
Expand All @@ -93,12 +114,6 @@ def parquet_to_root(

progress_bar = tqdm.tqdm(desc="Row-groups written")
progress_bar.reset(number_of_items)
out_file = uproot.recreate(
destination,
compression=uproot.compression.Compression.from_code_pair(
compression_code, compression_level
),
)

chunk = ak.from_parquet(file, row_groups=[0])
if not branch_types:
Expand Down
51 changes: 20 additions & 31 deletions tests/test_add_histograms.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

# ruff: noqa: PTH118


def test_simple(tmp_path):
gauss_1 = ROOT.TH1I("name", "title", 5, -4, 4)
gauss_1.FillRandom("gaus")
Expand Down Expand Up @@ -45,16 +44,7 @@ def test_simple(tmp_path):
h3 = uproot.from_pyroot(gauss_3)

destination = os.path.join(tmp_path, "destination.root")
hepconvert.add_histograms(
destination,
[
os.path.join(tmp_path, "file1.root"),
os.path.join(tmp_path, "file2.root"),
os.path.join(tmp_path, "file3.root"),
],
force=True,
progress_bar=True,
)
hepconvert.add_histograms(destination, [os.path.join(tmp_path, "file1.root"), os.path.join(tmp_path, "file2.root"), os.path.join(tmp_path, "file3.root")], force=True, progress_bar=True)
with uproot.open(destination) as file:
added = uproot.from_pyroot(
gauss_1 + gauss_2 + gauss_3
Expand All @@ -71,12 +61,12 @@ def test_simple(tmp_path):
).all


def mult_1D(tmp_path):
def mult_1D(tmp_path, file_paths):
gauss_1 = ROOT.TH1I("name1", "title", 5, -4, 4)
gauss_1.FillRandom("gaus")
gauss_1.Sumw2()
gauss_1.SetDirectory(0)
outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file1.root"), "RECREATE")
outHistFile = ROOT.TFile.Open(file_paths[0], "RECREATE")
outHistFile.cd()
gauss_1.Write()
outHistFile.Close()
Expand All @@ -86,7 +76,7 @@ def mult_1D(tmp_path):
gauss_2.FillRandom("gaus")
gauss_2.Sumw2()
gauss_2.SetDirectory(0)
outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file1.root"), "UPDATE")
outHistFile = ROOT.TFile.Open(file_paths[0], "UPDATE")
outHistFile.cd()
gauss_2.Write()
outHistFile.Close()
Expand All @@ -96,7 +86,7 @@ def mult_1D(tmp_path):
gauss_3.FillRandom("gaus")
gauss_3.Sumw2()
gauss_3.SetDirectory(0)
outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file2.root"), "RECREATE")
outHistFile = ROOT.TFile.Open(file_paths[1], "RECREATE")
outHistFile.cd()
gauss_3.Write()
outHistFile.Close()
Expand All @@ -106,7 +96,7 @@ def mult_1D(tmp_path):
gauss_4.FillRandom("gaus")
gauss_4.Sumw2()
gauss_4.SetDirectory(0)
outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file2.root"), "UPDATE")
outHistFile = ROOT.TFile.Open(file_paths[1], "UPDATE")
outHistFile.cd()
gauss_4.Write()
outHistFile.Close()
Expand All @@ -116,7 +106,7 @@ def mult_1D(tmp_path):
gauss_5.FillRandom("gaus")
gauss_5.Sumw2()
gauss_5.SetDirectory(0)
outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file3.root"), "RECREATE")
outHistFile = ROOT.TFile.Open(file_paths[2], "RECREATE")
outHistFile.cd()
gauss_5.Write()
outHistFile.Close()
Expand All @@ -126,23 +116,14 @@ def mult_1D(tmp_path):
gauss_6.FillRandom("gaus")
gauss_6.Sumw2()
gauss_6.SetDirectory(0)
outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file3.root"), "UPDATE")
outHistFile = ROOT.TFile.Open(file_paths[2], "UPDATE")
outHistFile.cd()
gauss_6.Write()
outHistFile.Close()
h6 = uproot.from_pyroot(gauss_6)

destination = os.path.join(tmp_path, "destination.root")
hepconvert.add_histograms(
destination,
[
os.path.join(tmp_path, "file1.root"),
os.path.join(tmp_path, "file2.root"),
os.path.join(tmp_path, "file3.root"),
],
force=True,
same_names=False,
)
hepconvert.add_histograms(destination, file_paths, force=True, same_names=False)

with uproot.open(destination) as file:
added = uproot.from_pyroot(
Expand Down Expand Up @@ -507,7 +488,9 @@ def simple_2D(tmp_path):
for i in range(len(data2)):
for j in range(len(data2[0])):
h2.Fill(i, j, data2[i][j])
outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file2dim2.root"), "RECREATE")
outHistFile = ROOT.TFile.Open(
os.path.join(tmp_path, "file2dim2.root"), "RECREATE"
)
outHistFile.cd()
h2.Write()
outHistFile.Close()
Expand All @@ -528,7 +511,9 @@ def simple_2D(tmp_path):
for j in range(len(data1[0])):
h1.Fill(i, j, data1[i][j])

outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file1dim2.root"), "RECREATE")
outHistFile = ROOT.TFile.Open(
os.path.join(tmp_path, "file1dim2.root"), "RECREATE"
)
outHistFile.cd()
h1.Write()
outHistFile.Close()
Expand All @@ -543,7 +528,9 @@ def simple_2D(tmp_path):
force=True,
)

with uproot.open(os.path.join(tmp_path, "place2.root")) as file:
with uproot.open(
os.path.join(tmp_path, "place2.root")
) as file:
assert file["name"].member("fN") == h1.member("fN")
assert file["name"].member("fTsumw") == h1.member("fTsumw") + h2.member(
"fTsumw"
Expand Down Expand Up @@ -606,3 +593,5 @@ def break_bins(tmp_path):
],
force=True,
)


0 comments on commit da7e99e

Please sign in to comment.