diff --git a/src/hepconvert/__main__.py b/src/hepconvert/__main__.py index 9d635d2..500f5d2 100644 --- a/src/hepconvert/__main__.py +++ b/src/hepconvert/__main__.py @@ -42,7 +42,7 @@ def main() -> None: @click.option( "-f", "--force", - default=False, + is_flag=True, help="If True, overwrites destination file if it already exists.", ) def parquet_to_root( @@ -59,7 +59,7 @@ def parquet_to_root( resize_factor=10.0, compression="zlib", compression_level=1, - force=False, + force, ): """ Convert Parquet file to ROOT file. @@ -111,7 +111,7 @@ def parquet_to_root( @click.option( "-f", "--force", - default=False, + is_flag=True, help="If True, overwrites destination file if it already exists.", ) def copy_root( @@ -125,7 +125,7 @@ def copy_root( cut=None, expressions=None, progress_bar=None, - force=False, + force, title="", field_name=lambda outer, inner: inner if outer == "" else outer + "_" + inner, initial_basket_capacity=10, @@ -168,7 +168,7 @@ def copy_root( @click.option( "-f", "--force", - default=False, + is_flag=True, help="Overwrite destination file if it already exists", ) @click.option("--progress-bar", default=None, type=bool, required=False) @@ -203,7 +203,7 @@ def add( files, *, progress_bar=False, - force=False, + force, append=False, compression="zlib", compression_level=1, @@ -257,7 +257,7 @@ def add( @click.option("--cut", default=None, type=str or list, required=False) @click.option("--expressions", default=None, type=str or list, required=False) @click.option( - "--force", default=True, help="Overwrite destination file if it already exists" + "--force", is_flag=True, help="Overwrite destination file if it already exists" ) @click.option("--append", default=False, help="Append histograms to an existing file") @click.option( @@ -293,7 +293,7 @@ def merge_root( resize_factor=10.0, counter_name=lambda counted: "n" + counted, step_size="100 MB", - force=True, + force, append=False, compression="LZ4", compression_level=1, @@ -342,7 +342,7 @@ def merge_root( @click.option( "-f", "--force", - default=False, + is_flag=True, type=bool, help="If a file already exists at specified path, it gets replaced", ) @@ -503,7 +503,7 @@ def root_to_parquet( parquet_extra_options=None, storage_options=None, tree=None, - force=False, + force, step_size=100, ): """ diff --git a/src/hepconvert/histogram_adding.py b/src/hepconvert/histogram_adding.py index 1a9fe47..3cf8ae6 100644 --- a/src/hepconvert/histogram_adding.py +++ b/src/hepconvert/histogram_adding.py @@ -8,7 +8,7 @@ from hepconvert import _utils -def _hadd_1d(destination, file, key, first, *, n_key=None): +def _hadd_1d(summed_hists, in_file, key, first, *, n_key=None): """Supporting function for add_histograms. :param destination: Name of the output file or file path. @@ -22,13 +22,10 @@ def _hadd_1d(destination, file, key, first, *, n_key=None): :type first: bool """ try: - hist = file[key] if n_key is None else file[n_key] + hist = in_file[key] if n_key is None else in_file[n_key] except ValueError: - msg = f"Key missing from {file}" + msg = f"Key missing from {in_file.file_path}" raise ValueError(msg) from None - # if file[key].classname == "TProfile": - # return TProfile_1d(destination, file, key, first, n_key=n_key) - outfile = uproot.open(destination) if first: member_data = np.array( [ @@ -44,17 +41,16 @@ def _hadd_1d(destination, file, key, first, *, n_key=None): hist.member("fTitle"), hist.values(flow=True), *member_data, - hist.variances(flow=True), + hist.variances(flow=False), uproot.writing.identify.to_TAxis( "fXaxis", "", - hist.member("fN"), + hist.member("fXaxis").member("fNbins"), hist.axis(axis="x").low, hist.axis(axis="x").high, - fXbins=hist.member("fXaxis").edges(flow=True), ), ) - if hist.member("fN") == outfile[key].member("fN"): + if hist.member("fN") == summed_hists[key].member("fN"): member_data = np.array( [ hist.member("fEntries"), @@ -64,42 +60,38 @@ def _hadd_1d(destination, file, key, first, *, n_key=None): hist.member("fTsumwx2"), ] ) - h_sum = uproot.writing.identify.to_TH1x( + return uproot.writing.identify.to_TH1x( hist.member("fName"), hist.member("fTitle"), - outfile[key].values(flow=True) + hist.values(flow=True), + summed_hists[key].values(flow=True) + hist.values(flow=True), *np.add( np.array( [ - outfile[key].member("fEntries"), - outfile[key].member("fTsumw"), - outfile[key].member("fTsumw2"), - outfile[key].member("fTsumwx"), - outfile[key].member("fTsumwx2"), + summed_hists[key].member("fEntries"), + summed_hists[key].member("fTsumw"), + summed_hists[key].member("fTsumw2"), + summed_hists[key].member("fTsumwx"), + summed_hists[key].member("fTsumwx2"), ] ), member_data, ), - outfile[key].variances(flow=True) + hist.variances(flow=True), + summed_hists[key].variances(flow=False) + hist.variances(flow=False), uproot.writing.identify.to_TAxis( "fXaxis", "", - hist.member("fN"), + hist.member("fXaxis").member("fNbins"), hist.axis(axis="x").low, hist.axis(axis="x").high, - fXbins=hist.member("fXaxis").edges(flow=True), ), ) - outfile.close() - return h_sum - - msg = f"Bins must be the same for histograms to be added, not {outfile[key].member('fN')} and {hist.member('fN')}" + msg = f"Bins must be the same for histograms to be added, not {summed_hists[key].member('fN')} and {hist.member('fN')}" raise ValueError( msg, ) from None -def _hadd_2d(destination, file, key, first, *, n_key=None): +def _hadd_2d(summed_hists, file, key, first, *, n_key=None): """Supporting function for add_histograms. :param destination: Name of the output file or file path. @@ -117,9 +109,6 @@ def _hadd_2d(destination, file, key, first, *, n_key=None): except ValueError: msg = f"Key missing from {file}" raise ValueError(msg) from None - # if file[key].classname == "TProfile2D": - # return TProfile_2d(destination, file, key, first, n_key=n_key) - outfile = uproot.open(destination) if first: member_data = np.array( [ @@ -138,14 +127,13 @@ def _hadd_2d(destination, file, key, first, *, n_key=None): hist.member("fTitle"), np.ravel(hist.values(flow=True), order="C"), *member_data, - np.ravel(hist.variances(flow=True), order="C"), + np.ravel(hist.variances(flow=False), order="C"), uproot.writing.identify.to_TAxis( "fXaxis", "", hist.member("fXaxis").member("fNbins"), hist.axis(axis="x").low, hist.axis(axis="x").high, - fXbins=hist.member("fXaxis").edges(flow=True), ), uproot.writing.identify.to_TAxis( "fYaxis", @@ -155,7 +143,7 @@ def _hadd_2d(destination, file, key, first, *, n_key=None): hist.axis(axis="y").high, ), ) - if hist.member("fN") == outfile[key].member("fN"): + if hist.member("fN") == summed_hists[key].member("fN"): member_data = np.array( [ hist.member("fEntries"), @@ -168,28 +156,29 @@ def _hadd_2d(destination, file, key, first, *, n_key=None): hist.member("fTsumwxy"), ] ) - h_sum = uproot.writing.identify.to_TH2x( + return uproot.writing.identify.to_TH2x( hist.member("fName"), hist.member("fTitle"), - np.ravel(outfile[key].values(flow=True), order="C") + np.ravel(summed_hists[key].values(flow=True), order="C") + np.ravel(hist.values(flow=True), order="C"), *np.add( np.array( [ - outfile[key].member("fEntries"), - outfile[key].member("fTsumw"), - outfile[key].member("fTsumw2"), - outfile[key].member("fTsumwx"), - outfile[key].member("fTsumwx2"), - outfile[key].member("fTsumwy"), - outfile[key].member("fTsumwy2"), - outfile[key].member("fTsumwxy"), + summed_hists[key].member("fEntries"), + summed_hists[key].member("fTsumw"), + summed_hists[key].member("fTsumw2"), + summed_hists[key].member("fTsumwx"), + summed_hists[key].member("fTsumwx2"), + summed_hists[key].member("fTsumwy"), + summed_hists[key].member("fTsumwy2"), + summed_hists[key].member("fTsumwxy"), ] ), member_data, ), np.ravel( - outfile[key].variances(flow=True) + hist.variances(flow=True), order="C" + summed_hists[key].variances(flow=False) + hist.variances(flow=False), + order="C", ), uproot.writing.identify.to_TAxis( "fXaxis", @@ -197,7 +186,6 @@ def _hadd_2d(destination, file, key, first, *, n_key=None): hist.member("fXaxis").member("fNbins"), hist.axis(axis="x").low, hist.axis(axis="x").high, - fXbins=hist.member("fXaxis").edges(flow=True), ), uproot.writing.identify.to_TAxis( "fYaxis", @@ -207,16 +195,13 @@ def _hadd_2d(destination, file, key, first, *, n_key=None): hist.axis(axis="y").high, ), ) - outfile.close() - return h_sum - - msg = f"Bins must be the same for histograms to be added, not {outfile[key].member('fN')} and {hist.member('fN')}" + msg = f"Bins must be the same for histograms to be added, not {summed_hists[key].member('fN')} and {hist.member('fN')}" raise ValueError( msg, ) from None -def _hadd_3d(destination, file, key, first, *, n_key=None): +def _hadd_3d(summed_hists, file, key, first, *, n_key=None): """Supporting function for add_histograms. :param destination: Name of the output file or file path. @@ -234,9 +219,6 @@ def _hadd_3d(destination, file, key, first, *, n_key=None): except ValueError: msg = f"Key missing from {file}" raise ValueError(msg) from None - # if file[key].classname == "TProfile3D": - # return TProfile_3d(destination, file, key, first, n_key=n_key) - outfile = uproot.open(destination) if first: member_data = np.array( [ @@ -259,14 +241,13 @@ def _hadd_3d(destination, file, key, first, *, n_key=None): hist.member("fTitle"), np.ravel(hist.values(flow=True), order="C"), *member_data, - np.ravel(hist.variances(flow=True), order="C"), + np.ravel(hist.variances(flow=False), order="C"), uproot.writing.identify.to_TAxis( "fXaxis", "", hist.member("fXaxis").member("fNbins"), hist.axis(axis="x").low, hist.axis(axis="x").high, - fXbins=hist.member("fXaxis").edges(flow=True), ), uproot.writing.identify.to_TAxis( "fYaxis", @@ -283,7 +264,7 @@ def _hadd_3d(destination, file, key, first, *, n_key=None): hist.axis(axis="z").high, ), ) - if hist.member("fN") == outfile[key].member("fN"): + if hist.member("fN") == summed_hists[key].member("fN"): member_data = np.add( np.array( [ @@ -304,31 +285,31 @@ def _hadd_3d(destination, file, key, first, *, n_key=None): np.array( [ hist.member("fEntries"), - outfile[key].member("fTsumw"), - outfile[key].member("fTsumw2"), - outfile[key].member("fTsumwx"), - outfile[key].member("fTsumwx2"), - outfile[key].member("fTsumwy"), - outfile[key].member("fTsumwy2"), - outfile[key].member("fTsumwxy"), - outfile[key].member("fTsumwz"), - outfile[key].member("fTsumwz2"), - outfile[key].member("fTsumwxz"), - outfile[key].member("fTsumwyz"), + summed_hists[key].member("fTsumw"), + summed_hists[key].member("fTsumw2"), + summed_hists[key].member("fTsumwx"), + summed_hists[key].member("fTsumwx2"), + summed_hists[key].member("fTsumwy"), + summed_hists[key].member("fTsumwy2"), + summed_hists[key].member("fTsumwxy"), + summed_hists[key].member("fTsumwz"), + summed_hists[key].member("fTsumwz2"), + summed_hists[key].member("fTsumwxz"), + summed_hists[key].member("fTsumwyz"), ] ), ) - h_sum = uproot.writing.identify.to_TH3x( + return uproot.writing.identify.to_TH3x( hist.member("fName"), hist.member("fTitle"), np.ravel( - outfile[key].values(flow=True) + hist.values(flow=True), order="C" + summed_hists[key].values(flow=True) + hist.values(flow=True), order="C" ), *member_data, ( - np.ravel(outfile[key].variances(flow=True), order="C") + np.ravel(summed_hists[key].variances(flow=False), order="C") + np.ravel( - hist.variances(flow=True), + hist.variances(flow=False), order="C", ) ), @@ -338,7 +319,6 @@ def _hadd_3d(destination, file, key, first, *, n_key=None): hist.member("fXaxis").member("fNbins"), hist.axis(axis="x").low, hist.axis(axis="x").high, - fXbins=hist.member("fXaxis").edges(flow=True), ), uproot.writing.identify.to_TAxis( "fYaxis", @@ -355,10 +335,8 @@ def _hadd_3d(destination, file, key, first, *, n_key=None): hist.axis(axis="z").high, ), ) - outfile.close() - return h_sum - msg = f"Bins must be the same for histograms to be added, not {outfile[key].member('fN')} and {hist.member('fN')}" + msg = f"Bins must be the same for histograms to be added, not {summed_hists[key].member('fN')} and {hist.member('fN')}" raise ValueError( msg, ) from None @@ -441,9 +419,9 @@ def add_histograms( msg = "Cannot append to a new file. Either force or append can be true." raise ValueError(msg) if append: - file_out = uproot.update(destination) + out_file = uproot.update(destination) elif force: - file_out = uproot.recreate( + out_file = uproot.recreate( destination, compression=uproot.compression.Compression.from_code_pair( compression_code, compression_level @@ -454,7 +432,7 @@ def add_histograms( raise FileNotFoundError( "File %s" + destination + " not found. File must exist to append." ) - file_out = uproot.recreate( + out_file = uproot.recreate( destination, compression=uproot.compression.Compression.from_code_pair( compression_code, compression_level @@ -477,7 +455,6 @@ def add_histograms( if progress_bar is True: tqdm = _utils.check_tqdm() number_of_items = len(files) - file_bar = tqdm.tqdm(desc="Files added") hist_bar = tqdm.tqdm(desc="Histograms added") @@ -501,9 +478,10 @@ def add_histograms( keys = file.keys(filter_classname="TH[1|2|3][I|S|F|D|C]", cycle=False) first = True + hists = {} for input_file in files: try: - file = uproot.open(input_file) + in_file = uproot.open(input_file) except FileNotFoundError: if skip_bad_files: continue @@ -514,419 +492,51 @@ def add_histograms( hist_bar.reset(len(keys)) for key in keys: try: - file[key] + in_file[key] except ValueError: if not union: continue msg = "Union key filter error." raise ValueError(msg) from None - if len(file[key].axes) == 1: - h_sum = _hadd_1d(destination, file, key, first) + if len(in_file[key].axes) == 1: + h_sum = _hadd_1d(hists, in_file, key, first) - elif len(file[key].axes) == 2: - h_sum = _hadd_2d(destination, file, key, first) + elif len(in_file[key].axes) == 2: + h_sum = _hadd_2d(hists, in_file, key, first) else: - h_sum = _hadd_3d(destination, file, key, first) + h_sum = _hadd_3d(hists, in_file, key, first) if progress_bar: - file_bar.update(n=1) + hist_bar.update(n=1) + + if h_sum is not None: + hists[key] = h_sum else: - n_keys = file.keys(filter_classname="TH[1|2|3][I|S|F|D|C]", cycle=False) + n_keys = in_file.keys(filter_classname="TH[1|2|3][I|S|F|D|C]", cycle=False) if progress_bar: hist_bar.reset(len(n_keys)) for i, _value in enumerate(keys): - if len(file[n_keys[i]].axes) == 1: - h_sum = _hadd_1d(destination, file, keys[i], first, n_key=n_keys[i]) + if len(in_file[n_keys[i]].axes) == 1: + h_sum = _hadd_1d(out_file, in_file, keys[i], first, n_key=n_keys[i]) elif len(file[n_keys[i]].axes) == 2: - h_sum = _hadd_2d(destination, file, keys[i], first, n_key=n_keys[i]) + h_sum = _hadd_2d(out_file, in_file, keys[i], first, n_key=n_keys[i]) else: - h_sum = _hadd_3d(destination, file, keys[i], first, n_key=n_keys[i]) + h_sum = _hadd_3d(out_file, in_file, keys[i], first, n_key=n_keys[i]) if h_sum is not None: - file_out[keys[i]] = h_sum + out_file[keys[i]] = h_sum if progress_bar: hist_bar.update(n=1) if progress_bar: file_bar.update(n=1) first = False - file.close() - file_out.close() + in_file.close() + for key, h_sum in hists.items(): + out_file[key] = h_sum -def _tprofile_1d(destination, file, key, first, *, n_key=None): - """ - Args: - :param destination: Name of the output file or file path. - :type destination: path-like - :param file: ROOT file to read histogram from. - :type file: ReadOnlyDirectory - :key: key to reference histogram to be added. - :type key: str - :param first: if True, special case for first of a certain histogram - to be added to the new file. - :type first: str - """ - hist = file[key] if n_key is None else file[n_key] - outfile = uproot.open(destination) - if first: - member_data = np.array( - [ - hist.member("fEntries"), - hist.member("fTsumw"), - hist.member("fTsumw2"), - hist.member("fTsumwx"), - hist.member("fTsumwx2"), - hist.member("fTsumwy"), - hist.member("fTsumwy2"), - ] - ) - return uproot.writing.identify.to_TProfile( - hist.member("fName"), - hist.member("fTitle"), - hist.values(flow=True), - *member_data, - hist.member("fSumw2"), - hist.member("fBinEntries"), - hist.member("fBinSumw2"), - hist.variances(flow=True), - uproot.writing.identify.to_TAxis( - "fXaxis", - "", - hist.member("fN"), - hist.axis(axis="x").low, - hist.axis(axis="x").high, - fXbins=hist.member("fXaxis").edges(flow=True), - ), - ) - if hist.member("fN") == outfile[key].member("fN"): - member_data = np.array( - [ - hist.member("fEntries"), - hist.member("fTsumw"), - hist.member("fTsumw2"), - hist.member("fTsumwx"), - hist.member("fTsumwx2"), - hist.member("fTsumwy"), - hist.member("fTsumwy2"), - ] - ) - h_sum = uproot.writing.identify.to_TProfile( - hist.member("fName"), - hist.member("fTitle"), - outfile[key].values(flow=True) + hist.values(flow=True), - *np.add( - np.array( - [ - outfile[key].member("fEntries"), - outfile[key].member("fTsumw"), - outfile[key].member("fTsumw2"), - outfile[key].member("fTsumwx"), - outfile[key].member("fTsumwx2"), - outfile[key].member("fTsumwy"), - outfile[key].member("fTsumwy2"), - outfile[key].member("fSumw2"), - outfile[key].member("fBinEntries"), - outfile[key].member("fBinSumw2"), - ] - ), - member_data, - ), - outfile[key].member("fSumw2") + hist.member("fSumw2"), - outfile[key].member("fBinEntries") + hist.member("fBinEntries"), - outfile[key].member("fBinEntries") + hist.member("fBinSumw2"), - outfile[key].variances(flow=True) + hist.variances(flow=True), - uproot.writing.identify.to_TAxis( - "fXaxis", - "", - hist.member("fN"), - hist.axis(axis="x").low, - hist.axis(axis="x").high, - fXbins=hist.member("fXaxis").edges(flow=True), - ), - ) - outfile.close() - return h_sum - - msg = "Bins must be the same for histograms to be added, not " - raise ValueError( - msg, - hist.member("fN"), - " and ", - outfile[key].member("fN"), - ) from None - - -def _tprofile_2d(destination, file, key, first, *, n_key=None): - """ - Args: - :param destination: Name of the output file or file path. - :type destination: path-like - :param file: ROOT file to read histogram from. - :type file: ReadOnlyDirectory - :key: key to reference histogram to be added. - :type key: str - :param first: if True, special case for first of a certain histogram - to be added to the new file. - :type first: str - """ - outfile = uproot.open(destination) - hist = file[key] if n_key is None else file[n_key] - - if first: - member_data = np.array( - [ - hist.member("fEntries"), - hist.member("fTsumw"), - hist.member("fTsumw2"), - hist.member("fTsumwx"), - hist.member("fTsumwx2"), - hist.member("fTsumwy"), - hist.member("fTsumwy2"), - hist.member("fTsumwxy"), - hist.member("fTsumwz"), - hist.member("fTsumwz2"), - hist.member("fSumw2"), - hist.member("fBinEntries"), - hist.member("fBinSumw2"), - ] - ) - return uproot.writing.identify.to_TProfile2D( - hist.member("fName"), - hist.member("fTitle"), - np.ravel(hist.values(flow=True), order="C"), - *member_data, - np.ravel(hist.variances(flow=True), order="C"), - uproot.writing.identify.to_TAxis( - "fXaxis", - "", - hist.member("fXaxis").member("fNbins"), - hist.axis(axis="x").low, - hist.axis(axis="x").high, - fXbins=hist.member("fXaxis").edges(flow=True), - ), - uproot.writing.identify.to_TAxis( - "fYaxis", - "", - hist.member("fYaxis").member("fNbins"), - hist.axis(axis="y").low, - hist.axis(axis="y").high, - ), - ) - if hist.member("fN") == outfile[key].member("fN"): - member_data = np.array( - [ - hist.member("fEntries"), - hist.member("fTsumw"), - hist.member("fTsumw2"), - hist.member("fTsumwx"), - hist.member("fTsumwx2"), - hist.member("fTsumwy"), - hist.member("fTsumwy2"), - hist.member("fTsumwxy"), - hist.member("fTsumwz"), - hist.member("fTsumwz2"), - hist.member("fSumw2"), - hist.member("fBinEntries"), - hist.member("fBinSumw2"), - ] - ) - h_sum = uproot.writing.identify.to_TH2x( - hist.member("fName"), - hist.member("fTitle"), - np.ravel(outfile[key].values(flow=True), order="C") - + np.ravel(hist.values(flow=True), order="C"), - *np.add( - np.array( - [ - outfile[key].member("fEntries"), - outfile[key].member("fTsumw"), - outfile[key].member("fTsumw2"), - outfile[key].member("fTsumwx"), - outfile[key].member("fTsumwx2"), - outfile[key].member("fTsumwy"), - outfile[key].member("fTsumwy2"), - outfile[key].member("fTsumwxy"), - outfile[key].member("fTsumwz"), - outfile[key].member("fTsumwz2"), - outfile[key].member("fSumw2"), - outfile[key].member("fBinEntries"), - outfile[key].member("fBinSumw2"), - ] - ), - member_data, - ), - np.ravel( - outfile[key].variances(flow=True) + hist.variances(flow=True), order="C" - ), - uproot.writing.identify.to_TAxis( - "fXaxis", - "", - hist.member("fXaxis").member("fNbins"), - hist.axis(axis="x").low, - hist.axis(axis="x").high, - fXbins=hist.member("fXaxis").edges(flow=True), - ), - uproot.writing.identify.to_TAxis( - "fYaxis", - "", - hist.member("fYaxis").member("fNbins"), - hist.axis(axis="y").low, - hist.axis(axis="y").high, - ), - ) - outfile.close() - return h_sum - - msg = "Bins must be the same for histograms to be added, not " - raise ValueError( - msg, - hist.member("fN"), - " and ", - outfile[key].member("fN"), - ) from None - - -def _tprofile_3d(destination, file, key, first, *, n_key=None): - """ - Args: - :param destination: Name of the output file or file path. - :type destination: path-like - :param file: ROOT file to read histogram from. - :type file: ReadOnlyDirectory - :key: key to reference histogram to be added. - :type key: str - :param first: if True, special case for first of a certain histogram - to be added to the new file. - :type first: str - """ - outfile = uproot.open(destination) - hist = file[key] if n_key is None else file[n_key] - - if first: - member_data = np.array( - [ - hist.member("fEntries"), - hist.member("fTsumw"), - hist.member("fTsumw2"), - hist.member("fTsumwx"), - hist.member("fTsumwx2"), - hist.member("fTsumwy"), - hist.member("fTsumwy2"), - hist.member("fTsumwxy"), - hist.member("fTsumwz"), - hist.member("fTsumwz2"), - hist.member("fTsumwxz"), - hist.member("fTsumwxyz"), - hist.member("fTsumwt"), - hist.member("fTsumwt2"), - hist.member("fSumw2"), - hist.member("fBinEntries"), - hist.member("fBinSumw2"), - ] - ) - return uproot.writing.identify.to_TProfile2D( - hist.member("fName"), - hist.member("fTitle"), - np.ravel(hist.values(flow=True), order="C"), - *member_data, - np.ravel(hist.variances(flow=True), order="C"), - uproot.writing.identify.to_TAxis( - "fXaxis", - "", - hist.member("fXaxis").member("fNbins"), - hist.axis(axis="x").low, - hist.axis(axis="x").high, - fXbins=hist.member("fXaxis").edges(flow=True), - ), - uproot.writing.identify.to_TAxis( - "fYaxis", - "", - hist.member("fYaxis").member("fNbins"), - hist.axis(axis="y").low, - hist.axis(axis="y").high, - ), - ) - if hist.member("fN") == outfile[key].member("fN"): - member_data = np.array( - [ - hist.member("fEntries"), - hist.member("fTsumw"), - hist.member("fTsumw2"), - hist.member("fTsumwx"), - hist.member("fTsumwx2"), - hist.member("fTsumwy"), - hist.member("fTsumwy2"), - hist.member("fTsumwxy"), - hist.member("fTsumwz"), - hist.member("fTsumwz2"), - hist.member("fTsumwxz"), - hist.member("fTsumwxyz"), - hist.member("fTsumwt"), - hist.member("fTsumwt2"), - hist.member("fSumw2"), - hist.member("fBinEntries"), - hist.member("fBinSumw2"), - ] - ) - h_sum = uproot.writing.identify.to_TH2x( - hist.member("fName"), - hist.member("fTitle"), - np.ravel(outfile[key].values(flow=True), order="C") - + np.ravel(hist.values(flow=True), order="C"), - *np.add( - np.array( - [ - outfile[key].member("fEntries"), - outfile[key].member("fTsumw"), - outfile[key].member("fTsumw2"), - outfile[key].member("fTsumwx"), - outfile[key].member("fTsumwx2"), - outfile[key].member("fTsumwy"), - outfile[key].member("fTsumwy2"), - outfile[key].member("fTsumwxy"), - outfile[key].member("fTsumwz"), - outfile[key].member("fTsumwz2"), - outfile[key].member("fTsumwxz"), - outfile[key].member("fTsumwxyz"), - outfile[key].member("fTsumwt"), - outfile[key].member("fTsumwt2"), - outfile[key].member("fSumw2"), - outfile[key].member("fBinEntries"), - outfile[key].member("fBinSumw2"), - ] - ), - member_data, - ), - np.ravel( - outfile[key].variances(flow=True) + hist.variances(flow=True), order="C" - ), - uproot.writing.identify.to_TAxis( - "fXaxis", - "", - hist.member("fXaxis").member("fNbins"), - hist.axis(axis="x").low, - hist.axis(axis="x").high, - fXbins=hist.member("fXaxis").edges(flow=True), - ), - uproot.writing.identify.to_TAxis( - "fYaxis", - "", - hist.member("fYaxis").member("fNbins"), - hist.axis(axis="y").low, - hist.axis(axis="y").high, - ), - ) - outfile.close() - return h_sum - - msg = "Bins must be the same for histograms to be added, not " - raise ValueError( - msg, - hist.member("fN"), - " and ", - outfile[key].member("fN"), - ) from None + out_file.close() diff --git a/tests/test_add_histograms.py b/tests/test_add_histograms.py index 7730cf7..01cdedf 100644 --- a/tests/test_add_histograms.py +++ b/tests/test_add_histograms.py @@ -1,6 +1,6 @@ from __future__ import annotations -from pathlib import Path +import os import numpy as np import pytest @@ -10,80 +10,15 @@ ROOT = pytest.importorskip("ROOT") +# ruff: noqa: PTH118 -def write_root_file(hist, path): - outHistFile = ROOT.TFile.Open(path, "RECREATE") - outHistFile.cd() - hist.Write() - outHistFile.Close() - -def generate_1D_gaussian(): +def test_simple(tmp_path): gauss_1 = ROOT.TH1I("name", "title", 5, -4, 4) gauss_1.FillRandom("gaus") gauss_1.Sumw2() gauss_1.SetDirectory(0) - outHistFile = ROOT.TFile.Open("/hepconvert/tests/samples/hist1.root", "RECREATE") - outHistFile.cd() - gauss_1.Write() - outHistFile.Close() - gauss_1 = uproot.from_pyroot(gauss_1) - - gauss_2 = ROOT.TH1I("name", "title", 5, -4, 4) - gauss_2.FillRandom("gaus") - gauss_2.Sumw2() - gauss_2.SetDirectory(0) - outHistFile = ROOT.TFile.Open("hepconvert/tests/samples/hist2.root", "RECREATE") - outHistFile.cd() - gauss_2.Write() - outHistFile.Close() - gauss_2 = uproot.from_pyroot(gauss_2) - - gauss_3 = ROOT.TH1I("name", "title", 5, -4, 4) - gauss_3.FillRandom("gaus") - gauss_3.Sumw2() - gauss_3.SetDirectory(0) - outHistFile = ROOT.TFile.Open("hepconvert/tests/samples/hist3.root", "RECREATE") - outHistFile.cd() - gauss_3.Write() - outHistFile.Close() - gauss_3 = uproot.from_pyroot(gauss_3) - - return gauss_1, gauss_2, gauss_3 - - -def generate_1D_simple(): - h1 = ROOT.TH1F("name", "", 10, 0.0, 10.0) - data1 = [11.5, 12.0, 9.0, 8.1, 6.4, 6.32, 5.3, 3.0, 2.0, 1.0] - for i in range(len(data1)): - h1.Fill(i, data1[i]) - - outHistFile = ROOT.TFile.Open("hepconvert/tests/samples/file1dim1.root", "RECREATE") - outHistFile.cd() - h1.Write() - outHistFile.Close() - h1 = uproot.from_pyroot(h1) - - h2 = ROOT.TH1F("name", "", 10, 0.0, 10.0) - data2 = [21.5, 10.0, 9.0, 8.2, 6.8, 6.32, 5.3, 3.0, 2.0, 1.0] - - for i in range(len(data2)): - h2.Fill(i, data2[i]) - - outHistFile = ROOT.TFile.Open("tests/file2dim1.root", "RECREATE") - outHistFile.cd() - h2.Write() - outHistFile.Close() - h2 = uproot.from_pyroot(h2) - return h1, h2 - - -def test_simple(tmp_path, file_paths): - gauss_1 = ROOT.TH1I("name", "title", 5, -4, 4) - gauss_1.FillRandom("gaus") - gauss_1.Sumw2() - gauss_1.SetDirectory(0) - outHistFile = ROOT.TFile.Open(file_paths[0], "RECREATE") + outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file1.root"), "RECREATE") outHistFile.cd() gauss_1.Write() outHistFile.Close() @@ -93,7 +28,7 @@ def test_simple(tmp_path, file_paths): gauss_2.FillRandom("gaus") gauss_2.Sumw2() gauss_2.SetDirectory(0) - outHistFile = ROOT.TFile.Open(file_paths[1], "RECREATE") + outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file2.root"), "RECREATE") outHistFile.cd() gauss_2.Write() outHistFile.Close() @@ -103,15 +38,23 @@ def test_simple(tmp_path, file_paths): gauss_3.FillRandom("gaus") gauss_3.Sumw2() gauss_3.SetDirectory(0) - outHistFile = ROOT.TFile.Open(file_paths[2], "RECREATE") + outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file3.root"), "RECREATE") outHistFile.cd() gauss_3.Write() outHistFile.Close() h3 = uproot.from_pyroot(gauss_3) - path = Path(tmp_path) - destination = path / "destination.root" - hepconvert.add_histograms(destination, file_paths, force=True, progress_bar=True) + destination = os.path.join(tmp_path, "destination.root") + hepconvert.add_histograms( + destination, + [ + os.path.join(tmp_path, "file1.root"), + os.path.join(tmp_path, "file2.root"), + os.path.join(tmp_path, "file3.root"), + ], + force=True, + progress_bar=True, + ) with uproot.open(destination) as file: added = uproot.from_pyroot( gauss_1 + gauss_2 + gauss_3 @@ -128,12 +71,12 @@ def test_simple(tmp_path, file_paths): ).all -def mult_1D(tmp_path, file_paths): +def mult_1D(tmp_path): gauss_1 = ROOT.TH1I("name1", "title", 5, -4, 4) gauss_1.FillRandom("gaus") gauss_1.Sumw2() gauss_1.SetDirectory(0) - outHistFile = ROOT.TFile.Open(file_paths[0], "RECREATE") + outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file1.root"), "RECREATE") outHistFile.cd() gauss_1.Write() outHistFile.Close() @@ -143,7 +86,7 @@ def mult_1D(tmp_path, file_paths): gauss_2.FillRandom("gaus") gauss_2.Sumw2() gauss_2.SetDirectory(0) - outHistFile = ROOT.TFile.Open(file_paths[0], "UPDATE") + outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file1.root"), "UPDATE") outHistFile.cd() gauss_2.Write() outHistFile.Close() @@ -153,7 +96,7 @@ def mult_1D(tmp_path, file_paths): gauss_3.FillRandom("gaus") gauss_3.Sumw2() gauss_3.SetDirectory(0) - outHistFile = ROOT.TFile.Open(file_paths[1], "RECREATE") + outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file2.root"), "RECREATE") outHistFile.cd() gauss_3.Write() outHistFile.Close() @@ -163,7 +106,7 @@ def mult_1D(tmp_path, file_paths): gauss_4.FillRandom("gaus") gauss_4.Sumw2() gauss_4.SetDirectory(0) - outHistFile = ROOT.TFile.Open(file_paths[1], "UPDATE") + outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file2.root"), "UPDATE") outHistFile.cd() gauss_4.Write() outHistFile.Close() @@ -173,7 +116,7 @@ def mult_1D(tmp_path, file_paths): gauss_5.FillRandom("gaus") gauss_5.Sumw2() gauss_5.SetDirectory(0) - outHistFile = ROOT.TFile.Open(file_paths[2], "RECREATE") + outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file3.root"), "RECREATE") outHistFile.cd() gauss_5.Write() outHistFile.Close() @@ -183,15 +126,23 @@ def mult_1D(tmp_path, file_paths): gauss_6.FillRandom("gaus") gauss_6.Sumw2() gauss_6.SetDirectory(0) - outHistFile = ROOT.TFile.Open(file_paths[2], "UPDATE") + outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file3.root"), "UPDATE") outHistFile.cd() gauss_6.Write() outHistFile.Close() h6 = uproot.from_pyroot(gauss_6) - path = Path(tmp_path) - destination = path / "destination.root" - hepconvert.add_histograms(destination, file_paths, force=True, same_names=False) + destination = os.path.join(tmp_path, "destination.root") + hepconvert.add_histograms( + destination, + [ + os.path.join(tmp_path, "file1.root"), + os.path.join(tmp_path, "file2.root"), + os.path.join(tmp_path, "file3.root"), + ], + force=True, + same_names=False, + ) with uproot.open(destination) as file: added = uproot.from_pyroot( @@ -214,11 +165,41 @@ def mult_1D(tmp_path, file_paths): ) + h6.member("fTsumw") -def test_3_glob(file_paths, tmp_path): - h1, h2, h3 = generate_1D_gaussian(file_paths) +def test_3_glob(tmp_path): + gauss_1 = ROOT.TH1I("name", "title", 5, -4, 4) + gauss_1.FillRandom("gaus") + gauss_1.Sumw2() + gauss_1.SetDirectory(0) + outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "hist1.root"), "RECREATE") + outHistFile.cd() + gauss_1.Write() + outHistFile.Close() + h1 = uproot.from_pyroot(gauss_1) + + gauss_2 = ROOT.TH1I("name", "title", 5, -4, 4) + gauss_2.FillRandom("gaus") + gauss_2.Sumw2() + gauss_2.SetDirectory(0) + outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "hist2.root"), "RECREATE") + outHistFile.cd() + gauss_2.Write() + outHistFile.Close() + h2 = uproot.from_pyroot(gauss_2) + + gauss_3 = ROOT.TH1I("name", "title", 5, -4, 4) + gauss_3.FillRandom("gaus") + gauss_3.Sumw2() + gauss_3.SetDirectory(0) + outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "hist3.root"), "RECREATE") + outHistFile.cd() + gauss_3.Write() + outHistFile.Close() + h3 = uproot.from_pyroot(gauss_3) hepconvert.add_histograms( - Path(tmp_path) / "place.root", Path(tmp_path) / "samples", force=True + os.path.join(tmp_path, "place.root"), + os.path.join(tmp_path, "samples"), + force=True, ) with uproot.open("tests/place.root") as file: @@ -235,14 +216,39 @@ def test_3_glob(file_paths, tmp_path): def simple_1dim_F(tmp_path): - h1, h2 = generate_1D_simple() + h1 = ROOT.TH1F("name", "", 10, 0.0, 10.0) + data1 = [11.5, 12.0, 9.0, 8.1, 6.4, 6.32, 5.3, 3.0, 2.0, 1.0] + for i in range(len(data1)): + h1.Fill(i, data1[i]) + + outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file1dim1.root"), "RECREATE") + outHistFile.cd() + h1.Write() + outHistFile.Close() + h1 = uproot.from_pyroot(h1) + + h2 = ROOT.TH1F("name", "", 10, 0.0, 10.0) + data2 = [21.5, 10.0, 9.0, 8.2, 6.8, 6.32, 5.3, 3.0, 2.0, 1.0] + + for i in range(len(data2)): + h2.Fill(i, data2[i]) + + outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file2dim1.root"), "RECREATE") + outHistFile.cd() + h2.Write() + outHistFile.Close() + h2 = uproot.from_pyroot(h2) + hepconvert.add_histograms( - Path(tmp_path) / "place2.root", - [Path(tmp_path) / "file1dim1.root", Path(tmp_path) / "file2dim1.root"], + os.path.join(tmp_path, "place2.root"), + [ + os.path.join(tmp_path, "file1dim1.root"), + os.path.join(tmp_path, "file2dim1.root"), + ], force=True, ) - with uproot.open(Path(tmp_path) / "place2.root") as file: + with uproot.open(os.path.join(tmp_path, "place2.root")) as file: assert file["name"].member("fN") == h1.member("fN") assert file["name"].member("fTsumw") == h1.member("fTsumw") + h2.member( "fTsumw" @@ -285,7 +291,7 @@ def mult_2D_hists(tmp_path): for j in range(len(data1[0])): h1.Fill(i, j, data1[i][j]) - outHistFile = ROOT.TFile.Open(Path(tmp_path) / "file3dim2.root", "RECREATE") + outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file3dim2.root"), "RECREATE") outHistFile.cd() h1.Write() outHistFile.Close() @@ -307,7 +313,7 @@ def mult_2D_hists(tmp_path): for j in range(len(data2[0])): h2.Fill(i, j, data2[i][j]) - outHistFile = ROOT.TFile.Open(Path(tmp_path) / "file3dim2.root", "UPDATE") + outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file3dim2.root"), "UPDATE") outHistFile.cd() h2.Write() outHistFile.Close() @@ -328,7 +334,7 @@ def mult_2D_hists(tmp_path): for j in range(len(data3[0])): h3.Fill(i, j, data3[i][j]) - outHistFile = ROOT.TFile.Open(Path(tmp_path) / "file4dim2.root", "RECREATE") + outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file4dim2.root"), "RECREATE") outHistFile.cd() h3.Write() outHistFile.Close() @@ -350,19 +356,22 @@ def mult_2D_hists(tmp_path): for j in range(len(data4[0])): h4.Fill(i, j, data4[i][j]) - outHistFile = ROOT.TFile.Open(Path(tmp_path) / "file4dim2.root", "UPDATE") + outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file4dim2.root"), "UPDATE") outHistFile.cd() h4.Write() outHistFile.Close() h4 = uproot.from_pyroot(h4) hepconvert.add_histograms( - Path(tmp_path) / "place2.root", - [Path(tmp_path) / "file3dim2.root", Path(tmp_path) / "file4dim2.root"], + os.path.join(tmp_path, "place2.root"), + [ + os.path.join(tmp_path, "file3dim2.root"), + os.path.join(tmp_path, "file4dim2.root"), + ], force=True, ) - with uproot.open(Path(tmp_path) / "place2.root") as file: + with uproot.open(os.path.join(tmp_path, "place2.root")) as file: assert file["name"].member("fN") == h1.member("fN") assert file["name"].member("fTsumw") == h1.member("fTsumw") + h3.member( "fTsumw" @@ -447,8 +456,11 @@ def simple_2dim_F(tmp_path): h2 = uproot.from_pyroot(h2) hepconvert.add_histograms( - Path(tmp_path) / "tests/place2.root", - [Path(tmp_path) / "/file1dim2.root", Path(tmp_path) / "/file2dim2.root"], + os.path.join(tmp_path, "place2.root"), + [ + os.path.join(tmp_path, "file1dim2.root"), + os.path.join(tmp_path, "file2dim2.root"), + ], force=True, ) @@ -495,7 +507,7 @@ def simple_2D(tmp_path): for i in range(len(data2)): for j in range(len(data2[0])): h2.Fill(i, j, data2[i][j]) - outHistFile = ROOT.TFile.Open(Path(tmp_path) / "file2dim2.root", "UPDATE") + outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file2dim2.root"), "RECREATE") outHistFile.cd() h2.Write() outHistFile.Close() @@ -516,19 +528,22 @@ def simple_2D(tmp_path): for j in range(len(data1[0])): h1.Fill(i, j, data1[i][j]) - outHistFile = ROOT.TFile.Open(Path(tmp_path) / "/file1dim2.root", "RECREATE") + outHistFile = ROOT.TFile.Open(os.path.join(tmp_path, "file1dim2.root"), "RECREATE") outHistFile.cd() h1.Write() outHistFile.Close() h1 = uproot.from_pyroot(h1) hepconvert.add_histograms( - Path(tmp_path) / "place2.root", - [Path(tmp_path) / "/file1dim2.root", Path(tmp_path) / "/file2dim2.root"], + os.path.join(tmp_path, "place2.root"), + [ + os.path.join(tmp_path, "file1dim2.root"), + os.path.join(tmp_path, "file2dim2.root"), + ], force=True, ) - with uproot.open(Path(tmp_path) / "place2.root") as file: + with uproot.open(os.path.join(tmp_path, "place2.root")) as file: assert file["name"].member("fN") == h1.member("fN") assert file["name"].member("fTsumw") == h1.member("fTsumw") + h2.member( "fTsumw" @@ -561,7 +576,9 @@ def break_bins(tmp_path): for i in range(len(data1)): h1.Fill(i, data1[i]) - outHistFile = ROOT.TFile.Open(Path(tmp_path) / "/file1dim1break.root", "RECREATE") + outHistFile = ROOT.TFile.Open( + os.path.join(tmp_path, "file1dim1break.root"), "RECREATE" + ) outHistFile.cd() h1.Write() outHistFile.Close() @@ -573,27 +590,19 @@ def break_bins(tmp_path): for i in range(len(data2)): h2.Fill(i, data2[i]) - outHistFile = ROOT.TFile.Open(Path(tmp_path) / "/file2dim1break.root", "RECREATE") + outHistFile = ROOT.TFile.Open( + os.path.join(tmp_path, "file2dim1break.root"), "RECREATE" + ) outHistFile.cd() h2.Write() outHistFile.Close() h2 = uproot.from_pyroot(h2) hepconvert.add_histograms( - Path(tmp_path) / "/place2break.root", + os.path.join(tmp_path, "place2break.root"), [ - Path(tmp_path) / "/file1dim1break.root", - Path(tmp_path) / "/file2dim1break.root", + os.path.join(tmp_path, "file1dim1break.root"), + os.path.join(tmp_path, "file2dim1break.root"), ], force=True, ) - - -test_simple( - "/Users/zobil/Desktop/directory", - [ - "/Users/zobil/Desktop/directory/hist1.root", - "/Users/zobil/Desktop/directory/hist2.root", - "/Users/zobil/Desktop/directory/hist3.root", - ], -) diff --git a/tests/test_parquet_to_root.py b/tests/test_parquet_to_root.py index 7984e2a..ea29180 100644 --- a/tests/test_parquet_to_root.py +++ b/tests/test_parquet_to_root.py @@ -1,5 +1,7 @@ from __future__ import annotations +from pathlib import Path + import awkward as ak import pytest import uproot @@ -9,16 +11,18 @@ skhep_testdata = pytest.importorskip("skhep_testdata") -def test_hepdata(): +def test_hepdata(tmp_path): arrays = uproot.open(skhep_testdata.data_path("uproot-hepdata-example.root"))[ "ntuple;1" ].arrays() - ak.to_parquet(arrays, "uproot-hepdata-example.parquet") + ak.to_parquet(arrays, Path(tmp_path) / "uproot-hepdata-example.parquet") parquet_to_root( - "uproot-hepdata-example.root", "uproot-hepdata-example.parquet", name="ntuple" + Path(tmp_path) / "uproot-hepdata-example.root", + Path(tmp_path) / "uproot-hepdata-example.parquet", + name="ntuple", ) - test = uproot.open("uproot-hepdata-example.root") + test = uproot.open(Path(tmp_path) / "uproot-hepdata-example.root") original = uproot.open(skhep_testdata.data_path("uproot-hepdata-example.root")) for key in original["ntuple"].keys(): @@ -29,7 +33,7 @@ def test_hepdata(): assert ak.all(test["ntuple"].arrays()[key] == original["ntuple"].arrays()[key]) -def test_hzz(): +def test_hzz(tmp_path): file = uproot.open(skhep_testdata.data_path("uproot-HZZ.root")) tree = file["events"] @@ -81,13 +85,13 @@ def test_hzz(): record = ak.Record(chunks) ak.to_parquet(record, "uproot-HZZ.parquet") parquet_to_root( - "tests/samples/parquet_HZZ.root", + Path(tmp_path) / "parquet_HZZ.root", "uproot-HZZ.parquet", name="events", progress_bar=True, counter_name=lambda counted: "N" + counted, ) - test = uproot.open("tests/samples/parquet_HZZ.root") + test = uproot.open(Path(tmp_path) / "parquet_HZZ.root") original = uproot.open(skhep_testdata.data_path("uproot-HZZ.root")) for key in original["events"].keys():