From 12ce97ebd21ed0703255246d115efd6aa9046fd0 Mon Sep 17 00:00:00 2001 From: Tim West <35032809+tjw45@users.noreply.github.com> Date: Tue, 18 Feb 2020 08:36:25 +0000 Subject: [PATCH 1/7] Copy data script behave with empty DataFrames. --- arctic/scripts/arctic_copy_data.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/arctic/scripts/arctic_copy_data.py b/arctic/scripts/arctic_copy_data.py index a997e6e86..5f127acb0 100644 --- a/arctic/scripts/arctic_copy_data.py +++ b/arctic/scripts/arctic_copy_data.py @@ -37,18 +37,22 @@ def _copy_symbol(symbols): if existing_data and splice: original_data = dest.read(symbol).data - preserve_start = to_pandas_closed_closed(DateRange(None, new_data.index[0].to_pydatetime(), - interval=CLOSED_OPEN)).end - preserve_end = to_pandas_closed_closed(DateRange(new_data.index[-1].to_pydatetime(), - None, - interval=OPEN_CLOSED)).start - if not original_data.index.tz: - # No timezone on the original, should we even allow this? - preserve_start = preserve_start.replace(tzinfo=None) - preserve_end = preserve_end.replace(tzinfo=None) - before = original_data.loc[:preserve_start] - after = original_data[preserve_end:] - new_data = before.append(new_data).append(after) + + if new_data is None or len(new_data) == 0: + new_data = original_data + else: + preserve_start = to_pandas_closed_closed(DateRange(None, new_data.index[0].to_pydatetime(), + interval=CLOSED_OPEN)).end + preserve_end = to_pandas_closed_closed(DateRange(new_data.index[-1].to_pydatetime(), + None, + interval=OPEN_CLOSED)).start + if not original_data.index.tz: + # No timezone on the original, should we even allow this? + preserve_start = preserve_start.replace(tzinfo=None) + preserve_end = preserve_end.replace(tzinfo=None) + before = original_data.loc[:preserve_start] + after = original_data[preserve_end:] + new_data = before.append(new_data).append(after) mt.write(symbol, new_data, metadata=version.metadata) return _copy_symbol From 8c2ece31c06fdbd60481a66d6611df3e03f09ed9 Mon Sep 17 00:00:00 2001 From: Tim West <35032809+tjw45@users.noreply.github.com> Date: Tue, 18 Feb 2020 08:38:39 +0000 Subject: [PATCH 2/7] Transaction and Arctic writes to behave the same --- arctic/store/audit.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arctic/store/audit.py b/arctic/store/audit.py index 2fd6e2f96..b5da7b857 100644 --- a/arctic/store/audit.py +++ b/arctic/store/audit.py @@ -120,10 +120,9 @@ def write(self, symbol, data, prune_previous_version=True, metadata=None, **kwar Records a write request to be actioned on context exit. Takes exactly the same parameters as the regular library write call. """ - if data is not None: - # We only write data if existing data is None or the Timeseries data has changed or metadata has changed - if self.base_ts.data is None or not are_equals(data, self.base_ts.data) or metadata != self.base_ts.metadata: - self._do_write = True + # We only write data if existing data is None or the Timeseries data has changed or metadata has changed + if self.base_ts.data is None or not are_equals(data, self.base_ts.data) or metadata != self.base_ts.metadata: + self._do_write = True self._write = partial(self._version_store.write, symbol, data, prune_previous_version=prune_previous_version, metadata=metadata, **kwargs) From 8a270200d777d1f6386fe29cba1e1a49a02deebc Mon Sep 17 00:00:00 2001 From: Tim West <35032809+tjw45@users.noreply.github.com> Date: Tue, 18 Feb 2020 08:41:21 +0000 Subject: [PATCH 3/7] Test for copy-splice empty data --- tests/integration/scripts/test_copy_data.py | 23 +++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/integration/scripts/test_copy_data.py b/tests/integration/scripts/test_copy_data.py index c3b5da4aa..219ae3f00 100644 --- a/tests/integration/scripts/test_copy_data.py +++ b/tests/integration/scripts/test_copy_data.py @@ -141,3 +141,26 @@ def test_copy_data_doesnt_exist(arctic, mongo_host): assert logger.info.call_args_list == [call('Copying data from %s -> %s' % (src_host, dest_host)), call('Copying: 0 symbols')] assert logger.warn.call_args_list == [call('No symbols found that matched those provided.')] + +@pytest.mark.parametrize('source_ts', [read_str_as_pandas(""" times | near"""), None]) +def test_copy_empty_data(arctic, mongo_host, source_ts): + src = 'user.library' + dest = 'user.library2' + # Put ts, ts1 in library + arctic[src].write('some_ts', source_ts) + + # Put some other value for ts in library2 + arctic[dest].write('some_ts', ts) + + # Create the user against the current mongo database + src_host = src + '@' + mongo_host + dest_host = dest + '@' + mongo_host + with patch('arctic.scripts.arctic_copy_data.logger') as logger: + run_as_main(mcd.main, '--src', src_host, '--dest', dest_host, '--log', 'CR101', '--splice', 'some_ts') + + assert_frame_equal(ts, arctic[dest].read('some_ts').data) + assert logger.info.call_args_list == [call('Copying data from %s -> %s' % (src_host, dest_host)), + call('Copying: 1 symbols')] + assert logger.warn.call_args_list == [call('Symbol: some_ts already exists in destination, splicing in new data')] + + assert arctic[dest].read_audit_log('some_ts')[0]['message'] == 'CR101' From 94541dc086c97f9d2ab2cfa35a42eaf94126cdb8 Mon Sep 17 00:00:00 2001 From: Tim West <35032809+tjw45@users.noreply.github.com> Date: Tue, 18 Feb 2020 08:46:20 +0000 Subject: [PATCH 4/7] Test writing None in transactions. --- tests/unit/store/test_version_store_audit.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/unit/store/test_version_store_audit.py b/tests/unit/store/test_version_store_audit.py index 6ecadc731..33a07b448 100644 --- a/tests/unit/store/test_version_store_audit.py +++ b/tests/unit/store/test_version_store_audit.py @@ -77,7 +77,6 @@ def test_ArcticTransaction_writes_if_metadata_changed(): def test_ArcticTransaction_writes_if_base_data_corrupted(): - vs = Mock(spec=VersionStore) ts1 = pd.DataFrame(index=[1, 2], data={'a': [1.0, 2.0]}) vs.read.side_effect = OperationFailure('some failure') @@ -109,7 +108,7 @@ def test_ArcticTransaction_writes_no_data_found(): assert vs.write.call_args_list == [call(sentinel.symbol, ANY, prune_previous_version=True, metadata={1: 2})] assert vs.list_versions.call_args_list == [call(sentinel.symbol, latest_only=True), - call(sentinel.symbol)] + call(sentinel.symbol)] def test_ArcticTransaction_writes_no_data_found_deleted(): @@ -146,7 +145,7 @@ def test_ArcticTransaction_does_nothing_when_data_not_modified(): assert not vs.write.called -def test_ArcticTransaction_does_nothing_when_data_is_None(): +def test_ArcticTransaction_does_write_when_new_data_is_None(): vs = Mock(spec=VersionStore) ts1 = pd.DataFrame(index=[1, 2], data={'a': [1.0, 2.0]}) vs.read.return_value = VersionedItem(symbol=sentinel.symbol, library=sentinel.library, version=1, metadata=None, @@ -156,9 +155,10 @@ def test_ArcticTransaction_does_nothing_when_data_is_None(): vs.list_versions.return_value = [{'version': 1}, {'version': 2}] with ArcticTransaction(vs, sentinel.symbol, sentinel.user, sentinel.log) as cwb: - pass + cwb.write(sentinel.symbol, None, metadata{1: 2}) + assert not vs._delete_version.called - assert not vs.write.called + assert vs.write.call_args_list == [call(sentinel.symbol, None, prune_previous_version=True, metadata={1: 2})] def test_ArcticTransaction_guards_against_inconsistent_ts(): From 9ca8c1006e7814388aa0445dde62f40fca99d2ca Mon Sep 17 00:00:00 2001 From: Tim West <35032809+tjw45@users.noreply.github.com> Date: Tue, 18 Feb 2020 09:09:39 +0000 Subject: [PATCH 5/7] Update test_version_store_audit.py --- tests/unit/store/test_version_store_audit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/store/test_version_store_audit.py b/tests/unit/store/test_version_store_audit.py index 33a07b448..2cd41379b 100644 --- a/tests/unit/store/test_version_store_audit.py +++ b/tests/unit/store/test_version_store_audit.py @@ -155,7 +155,7 @@ def test_ArcticTransaction_does_write_when_new_data_is_None(): vs.list_versions.return_value = [{'version': 1}, {'version': 2}] with ArcticTransaction(vs, sentinel.symbol, sentinel.user, sentinel.log) as cwb: - cwb.write(sentinel.symbol, None, metadata{1: 2}) + cwb.write(sentinel.symbol, None, metadata={1: 2}) assert not vs._delete_version.called assert vs.write.call_args_list == [call(sentinel.symbol, None, prune_previous_version=True, metadata={1: 2})] From 7e96d200e0bbd1b19ac68e63c902a01698b58c1f Mon Sep 17 00:00:00 2001 From: Tim West <35032809+tjw45@users.noreply.github.com> Date: Tue, 18 Feb 2020 12:34:02 +0000 Subject: [PATCH 6/7] Version numbers the wrong way round in test --- tests/unit/store/test_version_store_audit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/store/test_version_store_audit.py b/tests/unit/store/test_version_store_audit.py index 2cd41379b..6ceee280e 100644 --- a/tests/unit/store/test_version_store_audit.py +++ b/tests/unit/store/test_version_store_audit.py @@ -152,7 +152,7 @@ def test_ArcticTransaction_does_write_when_new_data_is_None(): data=ts1, host=sentinel.host) vs.write.return_value = VersionedItem(symbol=sentinel.symbol, library=sentinel.library, version=2, metadata=None, data=None, host=sentinel.host) - vs.list_versions.return_value = [{'version': 1}, {'version': 2}] + vs.list_versions.return_value = [{'version': 2}, {'version': 1}] with ArcticTransaction(vs, sentinel.symbol, sentinel.user, sentinel.log) as cwb: cwb.write(sentinel.symbol, None, metadata={1: 2}) From cf7d59be5c63a5fa78f8f1ed628b290712c47c89 Mon Sep 17 00:00:00 2001 From: Tim West <35032809+tjw45@users.noreply.github.com> Date: Tue, 18 Feb 2020 12:40:33 +0000 Subject: [PATCH 7/7] Update test_copy_data.py --- tests/integration/scripts/test_copy_data.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/scripts/test_copy_data.py b/tests/integration/scripts/test_copy_data.py index 219ae3f00..a92d90b77 100644 --- a/tests/integration/scripts/test_copy_data.py +++ b/tests/integration/scripts/test_copy_data.py @@ -163,4 +163,5 @@ def test_copy_empty_data(arctic, mongo_host, source_ts): call('Copying: 1 symbols')] assert logger.warn.call_args_list == [call('Symbol: some_ts already exists in destination, splicing in new data')] - assert arctic[dest].read_audit_log('some_ts')[0]['message'] == 'CR101' + # As the destination data is unchanged, no writing takes place and the audit log is empty. + assert len(arctic[dest].read_audit_log('some_ts')) == 0