From 20adc4f60815534971da5fec1d004507a79585c7 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 28 Nov 2018 10:47:55 -0600 Subject: [PATCH 1/9] fixed #44 --- awkward/array/jagged.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/awkward/array/jagged.py b/awkward/array/jagged.py index b5097bb6..32657a5c 100644 --- a/awkward/array/jagged.py +++ b/awkward/array/jagged.py @@ -895,7 +895,9 @@ def _canuseoffset(self): return offsetsaliased(self._starts, self._stops) or (len(self._starts.shape) == 1 and awkward.util.numpy.array_equal(self._starts[1:], self._stops[:-1])) def flatten(self): - if self._canuseoffset(): + if len(self) == 0: + return self._content[0:0] + elif self._canuseoffset(): return self._content[self._starts[0]:self._stops[-1]] else: offsets = counts2offsets(self.counts.reshape(-1)) From e02db48a9d01d54bae58febb10ab064888425f96 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 28 Nov 2018 10:58:38 -0600 Subject: [PATCH 2/9] fixed #36 --- awkward/array/jagged.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/awkward/array/jagged.py b/awkward/array/jagged.py index 32657a5c..1b6ce19c 100644 --- a/awkward/array/jagged.py +++ b/awkward/array/jagged.py @@ -811,7 +811,7 @@ def argdistincts(self): def distincts(self): return self.pairs(same=False) - def argpairs(self, same=True): + def _argpairs(self, same=True): import awkward.array.table self._valid() @@ -839,8 +839,11 @@ def argpairs(self, same=True): return out + def argpairs(self, same=True): + return self._argpairs(same=same) - self._starts + def pairs(self, same=True): - argpairs = self.argpairs(same=same) + argpairs = self._argpairs(same=same) left = argpairs._content["0"] right = argpairs._content["1"] @@ -848,7 +851,7 @@ def pairs(self, same=True): out._parents = argpairs._parents return out - def argcross(self, other): + def _argcross(self, other): import awkward.array.table self._valid() @@ -873,10 +876,13 @@ def argcross(self, other): out._parents = parents return out + def argcross(self, other): + return self._argcross(other) - self._starts + def cross(self, other): import awkward.array.table - argcross = self.argcross(other) + argcross = self._argcross(other) left, right = argcross._content._content.values() fields = [other._content[right]] From 5f9ae4b34ac1b8d1513c83a1635882788ab4f71c Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 28 Nov 2018 11:23:58 -0600 Subject: [PATCH 3/9] implemented at-accessor --- awkward/array/base.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/awkward/array/base.py b/awkward/array/base.py index f609f56f..34104395 100644 --- a/awkward/array/base.py +++ b/awkward/array/base.py @@ -34,11 +34,28 @@ import awkward.type import awkward.util +class At(object): + def __init__(self, array): + self._array = array + + def __repr__(self): + return " Date: Wed, 28 Nov 2018 11:29:59 -0600 Subject: [PATCH 4/9] the at-accessor will require a mid-version number increase because uproot will need to be coordinated --- awkward/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/awkward/version.py b/awkward/version.py index a68d668b..4fa43f41 100644 --- a/awkward/version.py +++ b/awkward/version.py @@ -30,7 +30,7 @@ import re -__version__ = "0.4.5" +__version__ = "0.5.0" version = __version__ version_info = tuple(re.split(r"[-\.]", __version__)) From 23731e5213ba655093bf96a43a38da57e6cdee0a Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 28 Nov 2018 11:45:33 -0600 Subject: [PATCH 5/9] fix test --- tests/test_methods.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/test_methods.py b/tests/test_methods.py index b0c6cb34..12ae49e6 100644 --- a/tests/test_methods.py +++ b/tests/test_methods.py @@ -116,17 +116,14 @@ def __add__(self, other): def __radd__(self, other): return self._type_op(operator.add, other, True) - class TypeArray(TypeArrayMethods, awkward.ObjectArray): def __init__(self, x): self._initObjectArray(awkward.Table()) self["x"] = x - class Type(TypeMethods): def __init__(self, x): self._x = x - counts = np.array([1, 4, 2, 0, 15]) x = np.arange(np.sum(counts)) @@ -150,5 +147,4 @@ def __init__(self, x): JaggedTypeArray = awkward.Methods.mixin(TypeArrayMethods, awkward.JaggedArray) jagged_array = JaggedTypeArray.fromcounts(counts, array) assert np.all(jagged_array.x.flatten() == x) - assert np.all(jagged_array.pairs()._0.x.counts == counts*(counts+1)//2) - + assert np.all(jagged_array.pairs().at(0).x.counts == counts*(counts+1)//2) From 55ca35d658745b5d5aa555826941d97b1e27a72a Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 28 Nov 2018 12:32:07 -0600 Subject: [PATCH 6/9] any() and all() should always return booleans --- awkward/array/base.py | 2 +- awkward/array/jagged.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/awkward/array/base.py b/awkward/array/base.py index 34104395..071dc330 100644 --- a/awkward/array/base.py +++ b/awkward/array/base.py @@ -58,7 +58,7 @@ def at(self): def __array__(self, dtype=None): # hitting this function is usually undesirable; uncomment to search for performance bugs - # raise Exception("{0} {1}".format(args, kwargs)) + # raise Exception if dtype is None: dtype = self.dtype diff --git a/awkward/array/jagged.py b/awkward/array/jagged.py index 1b6ce19c..28b8455f 100644 --- a/awkward/array/jagged.py +++ b/awkward/array/jagged.py @@ -922,7 +922,7 @@ def any(self): else: content = self._content != 0 - out = awkward.util.numpy.empty(self._starts.shape + content.shape[1:], dtype=content.dtype) + out = awkward.util.numpy.empty(self._starts.shape + content.shape[1:], dtype=awkward.BOOLTYPE) nonterminal = self.offsets[self.offsets != self.offsets[-1]] if os.name == "nt": # Windows Numpy reduceat requires 32-bit indexes nonterminal = nonterminal.astype(awkward.util.numpy.int32) @@ -943,7 +943,7 @@ def all(self): else: content = self._content != 0 - out = awkward.util.numpy.empty(self._starts.shape + content.shape[1:], dtype=content.dtype) + out = awkward.util.numpy.empty(self._starts.shape + content.shape[1:], dtype=awkward.BOOLTYPE) nonterminal = self.offsets[self.offsets != self.offsets[-1]] if os.name == "nt": # Windows Numpy reduceat requires 32-bit indexes nonterminal = nonterminal.astype(awkward.util.numpy.int32) From 132e6b0311a994053d28e12c509fc8d0db0dc036 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 28 Nov 2018 12:52:56 -0600 Subject: [PATCH 7/9] Table.Row should respond to 'at' in the same way --- awkward/array/table.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/awkward/array/table.py b/awkward/array/table.py index a353f82a..7c1500ba 100644 --- a/awkward/array/table.py +++ b/awkward/array/table.py @@ -55,8 +55,9 @@ def __init__(self, table, index): def __repr__(self): return "<{0} {1}>".format(self._table.rowname, self._index) - def __hasattr__(self, name): - return name in self._table._content or "_" + name in self._table._content + @property + def at(self): + return awkward.array.base.At(self) def __contains__(self, name): return name in self._table._content @@ -64,13 +65,6 @@ def __contains__(self, name): def tolist(self): return dict((n, self._table._try_tolist(x[self._index])) for n, x in self._table._content.items()) - def __getattr__(self, name): - content = self._table._content.get("_" + name, None) - if content is not None: - return content[self._index] - - raise AttributeError("{0} is not a column in this {1}".format(repr(name), self._table.rowname)) - def __getitem__(self, where): if isinstance(where, awkward.util.string): try: @@ -344,7 +338,7 @@ def _newslice(self, head): if head < 0: head += length if not 0 <= head < length: - IndexError("index {0} out of bounds for length {1}".format(original_head, length)) + raise IndexError("index {0} out of bounds for length {1}".format(original_head, length)) return head elif isinstance(self._view, tuple): @@ -352,7 +346,7 @@ def _newslice(self, head): if head < 0: head += mylength if not 0 <= head < mylength: - IndexError("index {0} out of bounds for length {1}".format(original_head, mylength)) + raise IndexError("index {0} out of bounds for length {1}".format(original_head, mylength)) return mystart + mystep*head else: @@ -360,7 +354,7 @@ def _newslice(self, head): if head < 0: head += length if not 0 <= head < length: - IndexError("index {0} out of bounds for length {1}".format(original_head, length)) + raise IndexError("index {0} out of bounds for length {1}".format(original_head, length)) return self._view[head] elif isinstance(head, slice): From c43eb55b82534b11f8012f9c369ca5e4934f85aa Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 28 Nov 2018 14:38:36 -0600 Subject: [PATCH 8/9] fixes #47 --- awkward/array/jagged.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/awkward/array/jagged.py b/awkward/array/jagged.py index 28b8455f..0409da1d 100644 --- a/awkward/array/jagged.py +++ b/awkward/array/jagged.py @@ -572,11 +572,26 @@ def __getitem__(self, where): newcounts = awkward.util.numpy.count_nonzero(mask, axis=1) newoffsets = counts2offsets(newcounts.reshape(-1)) - newcontent = node.content[(indexes + node._starts.reshape((len(node), 1)))[mask]] + newcontent = node._content[(indexes + node._starts.reshape((len(node), 1)))[mask]] node = node.copy(starts=newoffsets[:-1], stops=newoffsets[1:], content=newcontent) else: + head = awkward.util.numpy.array(head, copy=False) + if len(head.shape) == 1 and issubclass(head.dtype.type, awkward.util.numpy.integer): + index = awkward.util.numpy.tile(head, len(node)) + pluscounts = (index.reshape(-1, len(head)) + node.counts.reshape(-1, 1)).reshape(-1) + index[index < 0] = pluscounts[index < 0] + if (index < 0).any() or (index.reshape(-1, len(head)) >= node.counts.reshape(-1, 1)).any(): + raise IndexError("index in jagged subdimension is out of bounds") + index = (index.reshape(-1, len(head)) + self._starts.reshape(-1, 1)).reshape(-1) + return node._content[index].reshape(-1, len(head)) + + elif len(head.shape) == 1 and issubclass(head.dtype.type, (awkward.util.numpy.bool, awkward.util.numpy.bool_)): + # if len(self) != len(head): + # raise IndexError("boolean index did not match indexed array along dimension 0; dimension is {0} but corresponding boolean dimension is {1}".format(len(self), len(head))) + raise NotImplementedError + # the other cases are possible, but complicated; the first sets the form raise NotImplementedError("jagged second dimension index type: {0}".format(original_head)) From 778111cfa54740b3d9ad87c870938fea7bffb0d2 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 28 Nov 2018 14:43:38 -0600 Subject: [PATCH 9/9] jagged array nested indexing is finally complete --- awkward/array/jagged.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/awkward/array/jagged.py b/awkward/array/jagged.py index 0409da1d..2f057c5e 100644 --- a/awkward/array/jagged.py +++ b/awkward/array/jagged.py @@ -585,15 +585,13 @@ def __getitem__(self, where): if (index < 0).any() or (index.reshape(-1, len(head)) >= node.counts.reshape(-1, 1)).any(): raise IndexError("index in jagged subdimension is out of bounds") index = (index.reshape(-1, len(head)) + self._starts.reshape(-1, 1)).reshape(-1) - return node._content[index].reshape(-1, len(head)) + node = node._content[index].reshape(-1, len(head)) elif len(head.shape) == 1 and issubclass(head.dtype.type, (awkward.util.numpy.bool, awkward.util.numpy.bool_)): - # if len(self) != len(head): - # raise IndexError("boolean index did not match indexed array along dimension 0; dimension is {0} but corresponding boolean dimension is {1}".format(len(self), len(head))) - raise NotImplementedError + node = node.regular()[:, head] - # the other cases are possible, but complicated; the first sets the form - raise NotImplementedError("jagged second dimension index type: {0}".format(original_head)) + else: + raise TypeError("cannot interpret shape {0}, dtype {1} as a fancy index or mask".format(head.shape, head.dtype)) return node[tail]