Skip to content
This repository has been archived by the owner on Jun 21, 2022. It is now read-only.

Commit

Permalink
Merge pull request #45 from scikit-hep/issues-36-44
Browse files Browse the repository at this point in the history
Issues 36 44
  • Loading branch information
jpivarski authored Nov 28, 2018
2 parents ee891fa + 778111c commit 85eb239
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 35 deletions.
25 changes: 18 additions & 7 deletions awkward/array/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,31 @@
import awkward.type
import awkward.util

class At(object):
def __init__(self, array):
self._array = array

def __repr__(self):
return "<at accessor for {0}".format(repr(self._array)[1:])

def __getattr__(self, where):
return self._array[where]

def __call__(self, where):
return self._array[str(where)]

class AwkwardArray(awkward.util.NDArrayOperatorsMixin):
"""
AwkwardArray: abstract base class
"""

@property
def at(self):
return At(self)

def __array__(self, dtype=None):
# hitting this function is usually undesirable; uncomment to search for performance bugs
# raise Exception("{0} {1}".format(args, kwargs))
# raise Exception

if dtype is None:
dtype = self.dtype
Expand Down Expand Up @@ -92,12 +109,6 @@ def _try_tolist(self, x):
except AttributeError:
return x

def __getattr__(self, where):
if awkward.util.is_intstring(where):
return self[where[1:]]
else:
raise AttributeError("'{0}' object has no attribute '{1}'".format(self.__class__.__name__, where))

def __bool__(self):
raise ValueError("The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()")

Expand Down
41 changes: 31 additions & 10 deletions awkward/array/jagged.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,13 +572,26 @@ def __getitem__(self, where):

newcounts = awkward.util.numpy.count_nonzero(mask, axis=1)
newoffsets = counts2offsets(newcounts.reshape(-1))
newcontent = node.content[(indexes + node._starts.reshape((len(node), 1)))[mask]]
newcontent = node._content[(indexes + node._starts.reshape((len(node), 1)))[mask]]

node = node.copy(starts=newoffsets[:-1], stops=newoffsets[1:], content=newcontent)

else:
# the other cases are possible, but complicated; the first sets the form
raise NotImplementedError("jagged second dimension index type: {0}".format(original_head))
head = awkward.util.numpy.array(head, copy=False)
if len(head.shape) == 1 and issubclass(head.dtype.type, awkward.util.numpy.integer):
index = awkward.util.numpy.tile(head, len(node))
pluscounts = (index.reshape(-1, len(head)) + node.counts.reshape(-1, 1)).reshape(-1)
index[index < 0] = pluscounts[index < 0]
if (index < 0).any() or (index.reshape(-1, len(head)) >= node.counts.reshape(-1, 1)).any():
raise IndexError("index in jagged subdimension is out of bounds")
index = (index.reshape(-1, len(head)) + self._starts.reshape(-1, 1)).reshape(-1)
node = node._content[index].reshape(-1, len(head))

elif len(head.shape) == 1 and issubclass(head.dtype.type, (awkward.util.numpy.bool, awkward.util.numpy.bool_)):
node = node.regular()[:, head]

else:
raise TypeError("cannot interpret shape {0}, dtype {1} as a fancy index or mask".format(head.shape, head.dtype))

return node[tail]

Expand Down Expand Up @@ -811,7 +824,7 @@ def argdistincts(self):
def distincts(self):
return self.pairs(same=False)

def argpairs(self, same=True):
def _argpairs(self, same=True):
import awkward.array.table
self._valid()

Expand Down Expand Up @@ -839,16 +852,19 @@ def argpairs(self, same=True):

return out

def argpairs(self, same=True):
return self._argpairs(same=same) - self._starts

def pairs(self, same=True):
argpairs = self.argpairs(same=same)
argpairs = self._argpairs(same=same)
left = argpairs._content["0"]
right = argpairs._content["1"]

out = JaggedArray.fromoffsets(argpairs.offsets, awkward.array.table.Table(self._content[left], self._content[right]))
out._parents = argpairs._parents
return out

def argcross(self, other):
def _argcross(self, other):
import awkward.array.table
self._valid()

Expand All @@ -873,10 +889,13 @@ def argcross(self, other):
out._parents = parents
return out

def argcross(self, other):
return self._argcross(other) - self._starts

def cross(self, other):
import awkward.array.table

argcross = self.argcross(other)
argcross = self._argcross(other)
left, right = argcross._content._content.values()

fields = [other._content[right]]
Expand All @@ -895,7 +914,9 @@ def _canuseoffset(self):
return offsetsaliased(self._starts, self._stops) or (len(self._starts.shape) == 1 and awkward.util.numpy.array_equal(self._starts[1:], self._stops[:-1]))

def flatten(self):
if self._canuseoffset():
if len(self) == 0:
return self._content[0:0]
elif self._canuseoffset():
return self._content[self._starts[0]:self._stops[-1]]
else:
offsets = counts2offsets(self.counts.reshape(-1))
Expand All @@ -914,7 +935,7 @@ def any(self):
else:
content = self._content != 0

out = awkward.util.numpy.empty(self._starts.shape + content.shape[1:], dtype=content.dtype)
out = awkward.util.numpy.empty(self._starts.shape + content.shape[1:], dtype=awkward.BOOLTYPE)
nonterminal = self.offsets[self.offsets != self.offsets[-1]]
if os.name == "nt": # Windows Numpy reduceat requires 32-bit indexes
nonterminal = nonterminal.astype(awkward.util.numpy.int32)
Expand All @@ -935,7 +956,7 @@ def all(self):
else:
content = self._content != 0

out = awkward.util.numpy.empty(self._starts.shape + content.shape[1:], dtype=content.dtype)
out = awkward.util.numpy.empty(self._starts.shape + content.shape[1:], dtype=awkward.BOOLTYPE)
nonterminal = self.offsets[self.offsets != self.offsets[-1]]
if os.name == "nt": # Windows Numpy reduceat requires 32-bit indexes
nonterminal = nonterminal.astype(awkward.util.numpy.int32)
Expand Down
18 changes: 6 additions & 12 deletions awkward/array/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,22 +55,16 @@ def __init__(self, table, index):
def __repr__(self):
return "<{0} {1}>".format(self._table.rowname, self._index)

def __hasattr__(self, name):
return name in self._table._content or "_" + name in self._table._content
@property
def at(self):
return awkward.array.base.At(self)

def __contains__(self, name):
return name in self._table._content

def tolist(self):
return dict((n, self._table._try_tolist(x[self._index])) for n, x in self._table._content.items())

def __getattr__(self, name):
content = self._table._content.get("_" + name, None)
if content is not None:
return content[self._index]

raise AttributeError("{0} is not a column in this {1}".format(repr(name), self._table.rowname))

def __getitem__(self, where):
if isinstance(where, awkward.util.string):
try:
Expand Down Expand Up @@ -344,23 +338,23 @@ def _newslice(self, head):
if head < 0:
head += length
if not 0 <= head < length:
IndexError("index {0} out of bounds for length {1}".format(original_head, length))
raise IndexError("index {0} out of bounds for length {1}".format(original_head, length))
return head

elif isinstance(self._view, tuple):
mystart, mystep, mylength = self._view
if head < 0:
head += mylength
if not 0 <= head < mylength:
IndexError("index {0} out of bounds for length {1}".format(original_head, mylength))
raise IndexError("index {0} out of bounds for length {1}".format(original_head, mylength))
return mystart + mystep*head

else:
length = len(self._view)
if head < 0:
head += length
if not 0 <= head < length:
IndexError("index {0} out of bounds for length {1}".format(original_head, length))
raise IndexError("index {0} out of bounds for length {1}".format(original_head, length))
return self._view[head]

elif isinstance(head, slice):
Expand Down
2 changes: 1 addition & 1 deletion awkward/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

import re

__version__ = "0.4.5"
__version__ = "0.5.0"
version = __version__
version_info = tuple(re.split(r"[-\.]", __version__))

Expand Down
6 changes: 1 addition & 5 deletions tests/test_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,17 +116,14 @@ def __add__(self, other):
def __radd__(self, other):
return self._type_op(operator.add, other, True)


class TypeArray(TypeArrayMethods, awkward.ObjectArray):
def __init__(self, x):
self._initObjectArray(awkward.Table())
self["x"] = x


class Type(TypeMethods):
def __init__(self, x):
self._x = x


counts = np.array([1, 4, 2, 0, 15])
x = np.arange(np.sum(counts))
Expand All @@ -150,5 +147,4 @@ def __init__(self, x):
JaggedTypeArray = awkward.Methods.mixin(TypeArrayMethods, awkward.JaggedArray)
jagged_array = JaggedTypeArray.fromcounts(counts, array)
assert np.all(jagged_array.x.flatten() == x)
assert np.all(jagged_array.pairs()._0.x.counts == counts*(counts+1)//2)

assert np.all(jagged_array.pairs().at(0).x.counts == counts*(counts+1)//2)

0 comments on commit 85eb239

Please sign in to comment.