From f5e127a801e6563155da3dce522eec5eb3095f67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= <70441641+zbilodea@users.noreply.github.com> Date: Wed, 3 Jul 2024 10:56:56 +0200 Subject: [PATCH 1/2] fixed branch slimming (#102) --- src/hepconvert/copy_root.py | 17 +++++------------ tests/test_copy_root.py | 4 +++- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/src/hepconvert/copy_root.py b/src/hepconvert/copy_root.py index b0e1bac..ecd5bfa 100644 --- a/src/hepconvert/copy_root.py +++ b/src/hepconvert/copy_root.py @@ -221,7 +221,7 @@ def copy_root( tqdm = _utils.check_tqdm() progress_bar = tqdm.tqdm(desc="Trees copied") progress_bar.reset(total=number_of_items) - for t in trees: + for t in trees: # pylint: disable=too-many-nested-blocks tree = f[t] count_branches = get_counter_branches(tree) kb = filter_branches(tree, keep_branches, drop_branches, count_branches) @@ -251,19 +251,12 @@ def copy_root( ) } ) - for key in group: - if key in kb: - del chunk[key] + for key in group: + if key in kb: + del chunk[key] if first: first = False - if drop_branches: - branch_types = { - name: array.type - for name, array in chunk.items() - if name not in drop_branches - } - else: - branch_types = {name: array.type for name, array in chunk.items()} + branch_types = {name: array.type for name, array in chunk.items()} of.mktree( tree.name, branch_types, diff --git a/tests/test_copy_root.py b/tests/test_copy_root.py index 623f2e0..2f15b5a 100644 --- a/tests/test_copy_root.py +++ b/tests/test_copy_root.py @@ -66,13 +66,14 @@ def test_keep_branches(tmp_path): hepconvert.copy_root( Path(tmp_path) / "drop_branches.root", skhep_testdata.data_path("uproot-HZZ.root"), - drop_branches=["Jet_*", "MClepton_*"], + keep_branches="MClepton_*", counter_name=lambda counted: "N" + counted, force=True, ) original = uproot.open(skhep_testdata.data_path("uproot-HZZ.root")) file = uproot.open(Path(tmp_path) / "drop_branches.root") + file["events"].show() for key in original["events"].keys(): if key.startswith("MClepton_"): assert key in file["events"].keys() @@ -81,6 +82,7 @@ def test_keep_branches(tmp_path): ) else: assert key not in file["events"].keys() + file.close() def test_hepdata_example(tmp_path): From 1520c2295a274e5e84f607ea10b6bcd90f2cbf3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= <70441641+zbilodea@users.noreply.github.com> Date: Wed, 3 Jul 2024 12:13:30 +0200 Subject: [PATCH 2/2] fix: copy_root when selecting size with entry numbers, losing 100 entries (#103) * fixed branch slimming * linter * Another small fix with copy_root --- src/hepconvert/copy_root.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/hepconvert/copy_root.py b/src/hepconvert/copy_root.py index ecd5bfa..80637c3 100644 --- a/src/hepconvert/copy_root.py +++ b/src/hepconvert/copy_root.py @@ -266,7 +266,10 @@ def copy_root( initial_basket_capacity=initial_basket_capacity, resize_factor=resize_factor, ) - + try: + of[tree.name].extend(chunk) + except AssertionError: + msg = "Are the branch-names correct?" else: try: of[tree.name].extend(chunk)