Skip to content

Commit

Permalink
ML-1321: when key is number/date and None, skip it (#316)
Browse files Browse the repository at this point in the history
  • Loading branch information
katyakats authored Dec 2, 2021
1 parent f852c63 commit 2a0358a
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 1 deletion.
2 changes: 1 addition & 1 deletion storey/sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -763,7 +763,7 @@ async def _run_loop(self):
if isinstance(self._key_field, list):
key = []
for key_field in self._key_field:
if key_field not in body or body[key_field] is None:
if key_field not in body or pandas.isna(body[key_field]):
create_event = False
break
key.append(body[key_field])
Expand Down
33 changes: 33 additions & 0 deletions tests/test_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -2670,6 +2670,39 @@ def test_none_key_is_not_written():
assert result == expected


def test_none_key_num_is_not_written():
data = pd.DataFrame({'index': [10, None, 20], 'some_data': [1, 2, 3]})
data.set_index(keys=['index'], inplace=True)

controller = build_flow([
DataframeSource(data, key_field=['index']),
Reduce([], append_and_return),
]).run()
result = controller.await_termination()
expected = [{'index': 10, 'some_data': 1}, {'index': 20, 'some_data': 3}]

assert result == expected


def test_none_key_date_is_not_written():

data = pd.DataFrame({'index': [datetime(2020, 6, 27, 10, 23, 8, 420581),
None,
datetime(2020, 6, 28, 10, 23, 8, 420581)],
'some_data': [1, 2, 3]})
data.set_index(keys=['index'], inplace=True)

controller = build_flow([
DataframeSource(data, key_field=['index']),
Reduce([], append_and_return),
]).run()
result = controller.await_termination()
expected = [{'index': datetime(2020, 6, 27, 10, 23, 8, 420581), 'some_data': 1},
{'index': datetime(2020, 6, 28, 10, 23, 8, 420581), 'some_data': 3}]

assert result == expected


def test_csv_none_value_first_row(tmpdir):
out_file_par = f'{tmpdir}/test_csv_none_value_first_row_{uuid.uuid4().hex}.parquet'
out_file_csv = f'{tmpdir}/test_csv_none_value_first_row_{uuid.uuid4().hex}.csv'
Expand Down

0 comments on commit 2a0358a

Please sign in to comment.