diff --git a/dev-environment.yml b/dev-environment.yml index 01c89b7b..eea36bce 100644 --- a/dev-environment.yml +++ b/dev-environment.yml @@ -19,5 +19,6 @@ dependencies: - uncertainties >=3 - mplhep>=0.1.27 - histoprint>=2.2.0 + - rich - pip: - -e . diff --git a/docs/examples/HistNumbaFill.ipynb b/docs/examples/HistNumbaFill.ipynb new file mode 100644 index 00000000..08e5750c --- /dev/null +++ b/docs/examples/HistNumbaFill.ipynb @@ -0,0 +1,317 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Hist Design Prototype" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is `fill` method in python loop:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import numba as nb\n", + "from hist import Hist\n", + "from hist import axis\n", + "\n", + "array = np.random.randn(10000)\n", + "h = Hist.new.Reg(100, -3, 3, name=\"x\", label=\"x-axis\").Double()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Numba: Hist" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To extend the Numba, we first need to create a Hist type `HistType` for `Hist`, and then teach Numba about our type inference additions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from numba import types\n", + "import numba as nb\n", + "\n", + "# create Numba type\n", + "class HistType(types.Type):\n", + " arraytype = nb.types.Array(nb.types.float64, 1, \"C\")\n", + "\n", + " def __init__(self):\n", + " super().__init__(name=\"Hist\")\n", + "\n", + "\n", + "hist_type = HistType()\n", + "\n", + "# infer values\n", + "@nb.extending.typeof_impl.register(Hist)\n", + "def typeof_index(val, c):\n", + " return hist_type\n", + "\n", + "\n", + "# infer annotations\n", + "nb.extending.as_numba_type.register(Hist, hist_type)\n", + "\n", + "# infer operations\n", + "@nb.extending.type_callable(Hist)\n", + "def type_hist(context):\n", + " def typer(axes):\n", + " for ax in axes:\n", + " # TODO: Assumed all are Regular axes\n", + " if not (isinstance(ax, hist.axis.Regular)):\n", + " return None\n", + " return HistType\n", + "\n", + " return typer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We also need to teach Numba how to actually generate native representation for the new operations:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numba as nb\n", + "from numba.core import cgutils\n", + "from numba.extending import (\n", + " models,\n", + " overload_attribute,\n", + " lower_builtin,\n", + " NativeValue,\n", + ")\n", + "\n", + "# define data model\n", + "@nb.extending.register_model(HistType)\n", + "class HistModel(models.StructModel):\n", + " def __init__(self, dmm, fe_type):\n", + " members = [\n", + " (\"bins\", types.int64),\n", + " (\"lo\", types.float64),\n", + " (\"hi\", types.float64),\n", + " (\"data\", fe_type.arraytype),\n", + " ]\n", + " super().__init__(dmm, fe_type, members)\n", + "\n", + "\n", + "# expose attributes, porperties and constructors\n", + "nb.extending.make_attribute_wrapper(HistType, \"bins\", \"bins\")\n", + "nb.extending.make_attribute_wrapper(HistType, \"lo\", \"lo\")\n", + "nb.extending.make_attribute_wrapper(HistType, \"hi\", \"hi\")\n", + "nb.extending.make_attribute_wrapper(HistType, \"data\", \"data\")\n", + "\n", + "\n", + "@nb.extending.lower_builtin(Hist, types.Integer, types.Float, types.Float, types.Array)\n", + "def impl_h(context, builder, sig, args):\n", + " typ = sig.return_type\n", + " lo, hi, bins, data = args\n", + " h = cgutils.create_struct_proxy(typ)(context, builder)\n", + " h.lo = lo\n", + " h.hi = hi\n", + " h.bins = bins\n", + " h.data = data\n", + " return h._getvalue()\n", + "\n", + "\n", + "# unbox and box\n", + "@nb.extending.unbox(HistType)\n", + "def unbox_h(typ, obj, c):\n", + " # lower = h.axes[0][0][0]\n", + " # upper = h.axes[0][-1][-1]\n", + " # bins = h.axes[0].__len__(self)\n", + " # data = h.values()\n", + "\n", + " start_obj = c.pyapi.long_from_long(c.context.get_constant(nb.long_, 0))\n", + " stop_obj = c.pyapi.long_from_long(c.context.get_constant(nb.long_, -1))\n", + "\n", + " data_obj = c.pyapi.call_method(obj, \"values\")\n", + "\n", + " axis_tuple_obj = c.pyapi.object_getattr_string(obj, \"axes\")\n", + " axis_obj = c.pyapi.tuple_getitem(axis_tuple_obj, 0)\n", + " bins_obj = c.pyapi.call_method(axis_obj, \"__len__\")\n", + "\n", + " lo1_obj = c.pyapi.object_getitem(axis_obj, start_obj)\n", + " hi1_obj = c.pyapi.object_getitem(axis_obj, stop_obj)\n", + "\n", + " lo_obj = c.pyapi.tuple_getitem(lo1_obj, 0)\n", + " hi_obj = c.pyapi.object_getitem(hi1_obj, stop_obj)\n", + "\n", + " h = cgutils.create_struct_proxy(typ)(c.context, c.builder)\n", + "\n", + " h.bins = c.pyapi.number_as_ssize_t(bins_obj)\n", + " h.lo = c.pyapi.float_as_double(lo_obj)\n", + " h.hi = c.pyapi.float_as_double(hi_obj)\n", + " h.data = c.pyapi.to_native_value(typ.arraytype, data_obj).value\n", + "\n", + " c.pyapi.decref(bins_obj)\n", + " c.pyapi.decref(lo_obj)\n", + " c.pyapi.decref(hi_obj)\n", + " c.pyapi.decref(data_obj)\n", + "\n", + " c.pyapi.decref(lo1_obj)\n", + " c.pyapi.decref(hi1_obj)\n", + "\n", + " c.pyapi.decref(axis_tuple_obj)\n", + " # c.pyapi.decref(axis_obj) - no deref needed, crashes\n", + "\n", + " c.pyapi.decref(start_obj)\n", + " c.pyapi.decref(stop_obj)\n", + "\n", + " is_error = cgutils.is_not_null(c.builder, c.pyapi.err_occurred())\n", + " return NativeValue(h._getvalue(), is_error=is_error)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We also need to teach numba about running the fill:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@nb.extending.overload_method(HistType, \"fill\")\n", + "def fill_resolve(hist, val):\n", + " if not isinstance(hist, HistType):\n", + " return None\n", + " if not isinstance(val, nb.types.Float):\n", + " return None\n", + "\n", + " def fill(hist, val):\n", + " delta = 1 / ((hist.hi - hist.lo) / hist.bins)\n", + " i = int((val - hist.lo) * delta)\n", + "\n", + " if 0 <= i < hist.bins:\n", + " hist.data[i] += 1\n", + "\n", + " return fill" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Timing the Python version:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%timeit\n", + "h_python = h.copy()\n", + "h_python.fill(array)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@nb.njit\n", + "def nb_fill_hist(h, v):\n", + " for v in array:\n", + " h.fill(v)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Timing the Numba version:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%timeit\n", + "h_numba = h.copy()\n", + "nb_fill_hist(h_numba, array)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Showing the results:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "h_numba = h.copy()\n", + "nb_fill_hist(h_numba, array)\n", + "h_numba" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "h_python = h.copy()\n", + "h.fill(array)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "hist", + "language": "python", + "name": "hist" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/src/hist/basehist.py b/src/hist/basehist.py index 40725c65..93d0129f 100644 --- a/src/hist/basehist.py +++ b/src/hist/basehist.py @@ -500,6 +500,13 @@ def plot_pie( import hist.plot return hist.plot.plot_pie(self, ax=ax, **kwargs) + + def integrate(self, name: int | str, i_or_list: Loc | list[str | int] | None = None, j: Loc | None = None) -> Self: + if isinstance(i_or_list, list): + return self[{name: i_or_list}][{name: slice(0, len(i_or_list), sum)}] + + return self[{name: slice(i_or_list, j, sum)}] + def stack(self, axis: int | str) -> hist.stack.Stack: """ diff --git a/tests/test_general.py b/tests/test_general.py index ae351224..226da3b3 100644 --- a/tests/test_general.py +++ b/tests/test_general.py @@ -925,3 +925,17 @@ def test_quick_construct_direct(): assert tuple(h.sort(0, key=lambda x: -x).axes[0]) == (4, 2, 1) assert tuple(h.sort(1).axes[1]) == ("AB", "BC", "BCC") assert tuple(h.sort(1, reverse=True).axes[1]) == ("BCC", "BC", "AB") + + +def test_integrate(): + h = ( + hist.new.IntCat([4, 1, 2], name="x") + .StrCat(["AB", "BCC", "BC"], name="y") + .Int(1, 10) # To provide the start and stop values as arguments to the Int() constructor + ) + h.fill(4, "AB", 1) + h.fill(4, "BCC", 2) + h.fill(4, "BC", 4) + h.fill(4, "X", 8) + h1 = h.integrate("y", ["AB", "BC"]) + assert h1[{ "x": 4 }] == 5 \ No newline at end of file