Skip to content
This repository has been archived by the owner on Jun 21, 2022. It is now read-only.

Commit

Permalink
Merge pull request #34 from scikit-hep/issue-31
Browse files Browse the repository at this point in the history
Resolves issue #31
  • Loading branch information
jpivarski authored Nov 19, 2018
2 parents 72070b1 + 1ca9dd3 commit 99a71c0
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 24 deletions.
75 changes: 51 additions & 24 deletions awkward/array/jagged.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,14 @@ def fromjagged(cls, jagged):
jagged = jagged._tojagged(copy=False)
return cls(jagged._starts, jagged._stops, jagged._content)

@classmethod
def fromregular(cls, content, size=1):
quotient = -(-len(content) // size)
offsets = awkward.util.numpy.arange(0, quotient * size + 1, size, dtype=awkward.util.INDEXTYPE)
if len(offsets) > 0:
offsets[-1] = len(content)
return cls.fromoffsets(offsets, content)

def copy(self, starts=None, stops=None, content=None):
out = self.__class__.__new__(self.__class__)
out._starts = self._starts
Expand Down Expand Up @@ -507,46 +515,59 @@ def __getitem__(self, where):
elif head.start >= 0:
starts = awkward.util.numpy.minimum(counts, head.start)
else:
starts = awkward.util.numpy.minimum(counts, counts + head.start)
starts = awkward.util.numpy.maximum(0, awkward.util.numpy.minimum(counts, counts + head.start))

if head.stop is None:
stops = counts
elif head.stop >= 0:
stops = awkward.util.numpy.minimum(counts, head.stop)
else:
stops = awkward.util.numpy.minimum(counts, counts + head.stop)
stops = awkward.util.numpy.maximum(0, awkward.util.numpy.minimum(counts, counts + head.stop))

stops = awkward.util.numpy.maximum(starts, stops)

start = starts.min()
stop = stops.max()
indexes = awkward.util.numpy.empty((len(node), abs(stop - start)), dtype=awkward.util.INDEXTYPE)
indexes[:, :] = awkward.util.numpy.arange(start, stop)

mask = indexes >= starts.reshape((len(node), 1))
awkward.util.numpy.bitwise_and(mask, indexes < stops.reshape((len(node), 1)), out=mask)
if step != 1:
awkward.util.numpy.bitwise_and(mask, awkward.util.numpy.remainder(indexes - starts.reshape((len(node), 1)), step) == 0, out=mask)

else:
if head.start is None:
starts = counts - 1
elif head.start >= 0:
starts = awkward.util.numpy.minimum(counts - 1, head.start)
else:
starts = awkward.util.numpy.minimum(counts - 1, counts + head.start)
starts = awkward.util.numpy.maximum(-1, awkward.util.numpy.minimum(counts - 1, counts + head.start))

if head.stop is None:
stops = awkward.util.numpy.full(counts.shape, -1, dtype=awkward.util.INDEXTYPE)
elif head.stop >= 0:
stops = awkward.util.numpy.minimum(counts - 1, head.stop)
else:
stops = awkward.util.numpy.minimum(counts - 1, counts + head.stop)

if step > 0:
start = starts.min()
stop = stops.max()
newcounts = stops - starts
stops = awkward.util.numpy.maximum(-1, awkward.util.numpy.minimum(counts - 1, counts + head.stop))

quotient, remainder = divmod(stop - start, step)
oversize = quotient + (1 if remainder != 0 else 0)
indexes = awkward.util.numpy.empty((len(node), oversize), dtype=awkward.util.INDEXTYPE)
indexes[:, :] = awkward.util.numpy.arange(start, stop, step)
stops = awkward.util.numpy.minimum(starts, stops)

absindexes = indexes + node._starts.reshape((len(node), 1))
start = starts.max()
stop = stops.min()
indexes = awkward.util.numpy.empty((len(node), abs(stop - start)), dtype=awkward.util.INDEXTYPE)
indexes[:, :] = awkward.util.numpy.arange(start, stop, -1)

goodindexes = absindexes[awkward.util.numpy.bitwise_and(indexes >= starts.reshape((len(node), 1)), indexes < stops.reshape((len(node), 1)))]
mask = indexes <= starts.reshape((len(node), 1))
awkward.util.numpy.bitwise_and(mask, indexes > stops.reshape((len(node), 1)), out=mask)
if step != -1:
awkward.util.numpy.bitwise_and(mask, awkward.util.numpy.remainder(indexes - starts.reshape((len(node), 1)), step) == 0, out=mask)

newcounts = awkward.util.numpy.count_nonzero(mask, axis=1)
newoffsets = counts2offsets(newcounts.reshape(-1))
node = node.copy(starts=newoffsets[:-1], stops=newoffsets[1:], content=node.content[goodindexes])
newcontent = node.content[(indexes + node._starts.reshape((len(node), 1)))[mask]]

node = node.copy(starts=newoffsets[:-1], stops=newoffsets[1:], content=newcontent)

else:
# the other cases are possible, but complicated; the first sets the form
Expand Down Expand Up @@ -732,6 +753,20 @@ def recurse(x):
else:
return awkward.array.objects.Methods.maybemixin(type(result), JaggedArray).fromcounts(counts, result)

def regular(self):
if len(self) > 0 and not (self.counts.reshape(-1)[0] == self.counts).all():
raise ValueError("jagged array is not regular: different elements have different counts")
count = self.counts.reshape(-1)[0]

if self._canuseoffset():
out = self._content[self._starts[0]:self._stops[-1]]
return out.reshape(self._starts.shape + (count,) + self._content.shape[1:])

else:
indexes = awkward.util.numpy.repeat(self._starts, count).reshape(self._starts.shape + (count,))
indexes += awkward.util.numpy.arange(count)
return self._content[indexes]

@staticmethod
def aligned(*jaggedarrays):
if not all(isinstance(x, JaggedArray) for x in jaggedarrays):
Expand Down Expand Up @@ -1068,14 +1103,6 @@ def max(self):
else:
return self._minmax_general(False, False)

@classmethod
def regular(cls, content, size=1):
quotient = -(-len(content) // size)
offsets = awkward.util.numpy.arange(0, quotient * size + 1, size, dtype=awkward.util.INDEXTYPE)
if len(offsets) > 0:
offsets[-1] = len(content)
return cls.fromoffsets(offsets, content)

@classmethod
def concat(cls, first, *rest): # all elements of first followed by all elements of second
arrays = (first,) + rest
Expand Down
32 changes: 32 additions & 0 deletions tests/test_jagged.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,13 @@ def test_jagged_fancy(self):
a = JaggedArray([0, 3, 3, 5], [3, 3, 5, 10], [[0.0], [1.1], [2.2], [3.3], [4.4], [5.5], [6.6], [7.7], [8.8], [9.9]])
assert a[[1, 2]].tolist() == [[], [[3.3], [4.4]]]

def test_jagged_subslice(self):
a = JaggedArray.fromiter([[], [100, 101, 102], [200, 201, 202, 203], [300, 301, 302, 303, 304], [], [500, 501], [600], []])
for start in None, 0, 1, 2, 3, 4, 5, -1, -2, -3, -4, -5, -6:
for stop in None, 0, 1, 2, 3, 4, 5, -1, -2, -3, -4, -5, -6:
for step in None, 1, 2, 3, 4, 5, -1, -2, -3, -4, -5:
assert a[:, start:stop:step].tolist() == [x.tolist()[start:stop:step] for x in a]

def test_jagged_jagged(self):
a = JaggedArray.fromoffsets([0, 3, 3, 5], JaggedArray.fromoffsets([0, 3, 3, 8, 10, 10], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]))
assert [a[i].tolist() for i in range(len(a))] == [[[0.0, 1.1, 2.2], [], [3.3, 4.4, 5.5, 6.6, 7.7]], [], [[8.8, 9.9], []]]
Expand Down Expand Up @@ -191,6 +198,31 @@ def test_jagged_ufunc_table(self):
assert (a + numpy.array([100, 200, 300, 400])).tolist() == [[{"x": 100, "y": 100.0}, {"x": 101, "y": 101.1}, {"x": 102, "y": 102.2}], [], [{"x": 303, "y": 303.3}, {"x": 304, "y": 304.4}], [{"x": 405, "y": 405.5}, {"x": 406, "y": 406.6}, {"x": 407, "y": 407.7}, {"x": 408, "y": 408.8}, {"x": 409, "y": 409.9}]]
assert (a + awkward.Table(x=[100, 200, 300, 400], y=[1000, 2000, 3000, 4000])).tolist() == [[{"x": 100, "y": 1000.0}, {"x": 101, "y": 1001.1}, {"x": 102, "y": 1002.2}], [], [{"x": 303, "y": 3003.3}, {"x": 304, "y": 3004.4}], [{"x": 405, "y": 4005.5}, {"x": 406, "y": 4006.6}, {"x": 407, "y": 4007.7}, {"x": 408, "y": 4008.8}, {"x": 409, "y": 4009.9}]]

def test_jagged_regular(self):
a = JaggedArray([0, 3, 6, 9], [3, 6, 9, 12], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 10.0, 11.0])
assert a.regular().tolist() == [[0.0, 1.1, 2.2], [3.3, 4.4, 5.5], [6.6, 7.7, 8.8], [9.9, 10.0, 11.0]]

a = JaggedArray([0, 3, 6, 9], [3, 6, 9, 12], [[0.0], [1.1], [2.2], [3.3], [4.4], [5.5], [6.6], [7.7], [8.8], [9.9], [10.0], [11.0]])
assert a.regular().tolist() == [[[0.0], [1.1], [2.2]], [[3.3], [4.4], [5.5]], [[6.6], [7.7], [8.8]], [[9.9], [10.0], [11.0]]]

a = JaggedArray([[0, 3], [6, 9]], [[3, 6], [9, 12]], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 10.0, 11.0])
assert a.regular().tolist() == [[[0.0, 1.1, 2.2], [3.3, 4.4, 5.5]], [[6.6, 7.7, 8.8], [9.9, 10.0, 11.0]]]

a = JaggedArray([[0, 3], [6, 9]], [[3, 6], [9, 12]], [[0.0], [1.1], [2.2], [3.3], [4.4], [5.5], [6.6], [7.7], [8.8], [9.9], [10.0], [11.0]])
assert a.regular().tolist() == [[[[0.0], [1.1], [2.2]], [[3.3], [4.4], [5.5]]], [[[6.6], [7.7], [8.8]], [[9.9], [10.0], [11.0]]]]

a = JaggedArray([0, 3, 7, 10], [3, 6, 10, 13], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 999, 6.6, 7.7, 8.8, 9.9, 10.0, 11.0])
assert a.regular().tolist() == [[0.0, 1.1, 2.2], [3.3, 4.4, 5.5], [6.6, 7.7, 8.8], [9.9, 10.0, 11.0]]

a = JaggedArray([0, 3, 7, 10], [3, 6, 10, 13], [[0.0], [1.1], [2.2], [3.3], [4.4], [5.5], [999], [6.6], [7.7], [8.8], [9.9], [10.0], [11.0]])
assert a.regular().tolist() == [[[0.0], [1.1], [2.2]], [[3.3], [4.4], [5.5]], [[6.6], [7.7], [8.8]], [[9.9], [10.0], [11.0]]]

a = JaggedArray([[0, 3], [7, 10]], [[3, 6], [10, 13]], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 999, 6.6, 7.7, 8.8, 9.9, 10.0, 11.0])
assert a.regular().tolist() == [[[0.0, 1.1, 2.2], [3.3, 4.4, 5.5]], [[6.6, 7.7, 8.8], [9.9, 10.0, 11.0]]]

a = JaggedArray([[0, 3], [7, 10]], [[3, 6], [10, 13]], [[0.0], [1.1], [2.2], [3.3], [4.4], [5.5], [999], [6.6], [7.7], [8.8], [9.9], [10.0], [11.0]])
assert a.regular().tolist() == [[[[0.0], [1.1], [2.2]], [[3.3], [4.4], [5.5]]], [[[6.6], [7.7], [8.8]], [[9.9], [10.0], [11.0]]]]

def test_jagged_cross(self):
pass

Expand Down

0 comments on commit 99a71c0

Please sign in to comment.