diff --git a/requirements.txt b/requirements.txt index 14a10bb..9367bdd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,5 @@ asynctest==0.13.0 -aurum==0.1.14 -catalysis-client==1.0.1 +aurum==0.1.15 freezegun==0.3.15 numpy==1.18.1 pandas==1.0.1 diff --git a/setup.py b/setup.py index 8aac37f..44fa02b 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ author='Adriano Marques, Nathan Martins, Thales Ribeiro', author_email='adriano@xnv.io, nathan@xnv.io, thales@xnv.io', python_requires='>=3.7.0', - install_requires=['catalysis-client', 'numpy'], + install_requires=['aurum', 'numpy'], include_package_data=True, license="GNU LGPLv3", url='https://github.com/exponential-ventures/stripping', diff --git a/stripping/__init__.py b/stripping/__init__.py index 9a7d2d7..189d2c9 100755 --- a/stripping/__init__.py +++ b/stripping/__init__.py @@ -24,14 +24,22 @@ import os -from catalysis.common.configuration import ClientConfiguration - from .cache import StepCache from .executor import Stripping, Context from .logging import Logging logging = Logging().get_logger() +try: + from catalysis.common.configuration import ClientConfiguration + + has_catalysis = True +except ImportError as error: + has_catalysis = False + logging.warn(f"Not using Catalysis: {str(error)}") +except Exception as error: + pass + def setup_stripping(cache_dir: str = None): if cache_dir is None: @@ -45,6 +53,8 @@ def setup_stripping(cache_dir: str = None): def setup_stripping_with_catalysis(catalysis_credential_name: str, cache_dir: str = None): + if not has_catalysis: + raise RuntimeError("Catalysis is not available") if cache_dir is None: cache_dir = fetch_catalysis_default_location(catalysis_credential_name) @@ -54,6 +64,8 @@ def setup_stripping_with_catalysis(catalysis_credential_name: str, cache_dir: st def fetch_catalysis_default_location(catalysis_credential_name: str): + if not has_catalysis: + raise RuntimeError("Catalysis is not available") credential = ClientConfiguration().get_credential(catalysis_credential_name) if 'path' not in credential.keys(): raise RuntimeError( diff --git a/stripping/cache.py b/stripping/cache.py index 18bdcad..a2b6afd 100755 --- a/stripping/cache.py +++ b/stripping/cache.py @@ -34,7 +34,16 @@ from .exceptions import StepNotCached from .singleton import SingletonDecorator from .storage import CacheStorage -from catalysis.storage.storage_client import StorageClient + +try: + from catalysis.storage.storage_client import StorageClient + + has_catalysis = True +except ImportError as error: + has_catalysis = False + logging.warn(f"Not using Catalysis: {str(error)}") +except Exception as error: + pass ACCESS = 'access' DIR_PATH = 'path' @@ -89,7 +98,7 @@ def __init__(self, catalysis_credential_name: str = ''): self.__cached_dirs = {} self.catalysis_client = None - if catalysis_credential_name != '': + if has_catalysis and catalysis_credential_name != '': self.catalysis_client = StorageClient(catalysis_credential_name) def add_dir(self, cache_dir): @@ -107,8 +116,7 @@ async def force_delete(self, cache_dir): logging.info(' {} deleted'.format(cache_dir)) if cache_dir in self.__cached_dirs: - del(self.__cached_dirs[cache_dir]) - + del (self.__cached_dirs[cache_dir]) async def strategy(self): """ @@ -123,7 +131,7 @@ async def strategy(self): self.__cached_dirs[d] = {} for dir_path in glob('{}/*'.format(d)): self.__cached_dirs[d][dir_path] = {} - self.__cached_dirs[d][dir_path][ACCESS] = await self.__last_access( dir_path) + self.__cached_dirs[d][dir_path][ACCESS] = await self.__last_access(dir_path) if self.__cached_dirs[d][dir_path][ACCESS] <= three_months_ago_timestamp: await self.force_delete(dir_path) await asyncio.sleep(0.2) @@ -164,4 +172,3 @@ async def percentage_disk_free_space(self): total = stats.f_frsize * stats.f_blocks free = stats.f_frsize * stats.f_bavail return (free / total) * 100 - diff --git a/stripping/elemental/base.py b/stripping/elemental/base.py index a06e1eb..ad152a2 100644 --- a/stripping/elemental/base.py +++ b/stripping/elemental/base.py @@ -23,10 +23,18 @@ import json -from pandas.core.frame import DataFrame +import logging from datetime import datetime -from catalysis.storage import StorageClient +try: + from catalysis.storage import StorageClient + + has_catalysis = True +except ImportError as error: + has_catalysis = False + logging.warn(f"Not using Catalysis: {str(error)}") +except Exception as error: + pass STOUT = 'stdout' FILE = 'file' @@ -52,8 +60,7 @@ def column_selection(self, columns: list) -> None: def filters(self, *filters): self.__filters = filters - def report(self, report_name, path="/tmp/elemental_report.txt", report_type=STOUT, - catalysis_client: StorageClient = None): + def report(self, report_name, path="/tmp/elemental_report.txt", report_type=STOUT, catalysis_client=None): self._path = path self._report_type = report_type self._report_name = report_name @@ -88,7 +95,7 @@ def _elemental_report(self) -> None: f.write(self._generate_json_report(self._report_name)) else: f.write(self._generate_report(self._report_name)) - else: + elif has_catalysis: with self._catalysis_client.open(self._path, '+w') as f: if self._report_type == JSON: f.write(self._generate_json_report(self._report_name)) diff --git a/stripping/executor.py b/stripping/executor.py index 26f360e..abebd39 100755 --- a/stripping/executor.py +++ b/stripping/executor.py @@ -1,24 +1,18 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python +# -*- coding: utf-8 -*- ## -## Authors: Adriano Marques -## Nathan Martins -## Thales Ribeiro +## ---------------- +## | | +## | CONFIDENTIAL | +## | | +## ---------------- ## -## Copyright (C) 2019 Exponential Ventures LLC +## Copyright Exponential Ventures LLC (C), 2019 All Rights Reserved ## -## This library is free software; you can redistribute it and/or -## modify it under the terms of the GNU Library General Public -## License as published by the Free Software Foundation; either -## version 2 of the License, or (at your option) any later version. +## Author: Adriano Marques ## -## This library is distributed in the hope that it will be useful, -## but WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -## Library General Public License for more details. -## -## You should have received a copy of the GNU Library General Public -## License along with this library; if not, write to the Free Software -## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +## If you do not have a written authorization to read this code +## PERMANENTLY REMOVE IT FROM YOUR SYSTEM IMMEDIATELY. ## @@ -32,7 +26,6 @@ import numpy as np import pandas as pd -from catalysis.storage import StorageClient from .cache import StepCache from .singleton import SingletonDecorator @@ -46,6 +39,17 @@ except Exception as error: pass +try: + from catalysis.storage import StorageClient + + has_catalysis = True + +except ImportError as error: + has_catalysis = False + logging.warn(f"Not using Catalysis: {str(error)}") +except Exception as error: + pass + @SingletonDecorator class Context: @@ -70,14 +74,13 @@ def __getattr__(self, attr_name): else: if os.path.exists(attr_file_name): - res = self._deserialize(attr_file_name) - setattr(self, attr_name, res) - return res + self._deserialize(attr_file_name) + return getattr(self, attr_name) + logging.warning(f"Attribute '{attr_name}' was not found.") raise AttributeError(f"Attribute '{attr_name}' was not found.") def serialize(self) -> None: - for attr in dir(self): if attr.startswith("_") or attr == 'self': continue @@ -86,7 +89,7 @@ def serialize(self) -> None: if inspect.ismethod(attribute): continue - if isinstance(attribute, StorageClient): + if has_catalysis and isinstance(attribute, StorageClient): continue context_file_name = os.path.join(self.__context_location, attr) @@ -103,20 +106,13 @@ def serialize(self) -> None: logging.debug(f"Context Attribute '{attr}' is a python object of type '{type(attribute)}'.") attr_file.write(pickle.dumps(attribute)) else: - - if isinstance(attribute, pd.DataFrame): - logging.debug(f"Context Attribute '{attr}' is a Pandas DataFrame") - attribute.to_pickle(context_file_name) - else: - - with open(context_file_name, 'wb') as attr_file: - if isinstance(attribute, np.ndarray): - logging.debug(f"Context Attribute '{attr}' is a numpy array.") - np.save(attr_file, attribute) - else: - logging.debug( - f" Context Attribute '{attr}' is a python object of type '{type(attribute)}'.") - pickle.dump(attribute, attr_file) + with open(context_file_name, 'wb') as attr_file: + if isinstance(attribute, np.ndarray): + logging.debug(f"Context Attribute '{attr}' is a numpy array.") + np.save(attr_file, attribute) + else: + logging.debug(f" Context Attribute '{attr}' is a python object of type '{type(attribute)}'.") + pickle.dump(attribute, attr_file) def deserialize(self) -> None: @@ -129,10 +125,9 @@ def deserialize(self) -> None: self._deserialize(os.path.join(self.__context_location, attr_file_name)) def _deserialize(self, attr_file_name): - logging.debug(f"Deserializing context attribute from '{attr_file_name}'") + logging.info(f"Deserializing context attribute from '{attr_file_name}'") # TODO Refactor this to be more elegant - # TODO Add 'pd.read_pickle' support if self.catalysis_client is not None: with self.catalysis_client.open(attr_file_name, 'rb') as attr_file: try: @@ -146,26 +141,17 @@ def _deserialize(self, attr_file_name): setattr(self, attr_file_name, np.load(attr_file)) logging.debug(f"Successfully deserialized '{attr_file_name}' as a numpy array.") else: - - try: - return pd.read_pickle(attr_file_name) - except: - - deserializing_methods = [ - pickle.load, - np.load, - ] - - with open(attr_file_name, 'rb') as attr_file: - - for m in deserializing_methods: - - try: - return m(attr_file) - except: - pass - - raise AttributeError(f"Unable to deserialize {attr_file}") + with open(attr_file_name, 'rb') as attr_file: + try: + logging.debug(f"Attempting to deserialize '{attr_file_name}' with pickle...") + setattr(self, attr_file_name, pickle.load(attr_file)) + logging.debug( + f"Successfully deserialized '{attr_file_name}' as a python object of " + f"type '{type(getattr(self, attr_file_name))}'") + except Exception: + logging.debug(f"Attempting to deserialize '{attr_file_name}' with numpy...") + setattr(self, attr_file_name, np.load(attr_file)) + logging.debug(f"Successfully deserialized '{attr_file_name}' as a numpy array.") @SingletonDecorator @@ -259,20 +245,7 @@ def get_chained_step(self, current_step): @staticmethod def commit_aurum(step_name: str) -> None: - if 'aurum' in sys.modules.keys(): - - try: - au.base.git.add_dirs(['.']) - au.base.git.commit( - commit_message=f"Auto commit step:{step_name}", - ) - logging.info(f"step {step_name} has been committed in the repository") - except au.base.git.GitCommandError as e: - logging.warning(f"failed to commit to local repository: {e}") - return - - try: - au.base.git.push() - logging.info(f"step {step_name} has been saved in the remote repository") - except au.base.git.GitCommandError as e: - logging.warning(f"failed to push to remote repository: {e}") + if 'au' in sys.modules: + au.base.git.commit(step_name) + au.base.git.push() + logging.info(f"step {step_name} has been saved in the Aurum's repository") diff --git a/stripping/storage.py b/stripping/storage.py index 2e887c3..d4a0a5e 100755 --- a/stripping/storage.py +++ b/stripping/storage.py @@ -32,7 +32,12 @@ from tempfile import TemporaryFile from typing import Iterable -from catalysis.storage import StorageClient +try: + from catalysis.storage import StorageClient + + has_catalysis = True +except ImportError: + has_catalysis = False from .exceptions import StepNotCached @@ -46,7 +51,7 @@ class CacheStorage: def __init__(self, cache_dir: str, catalysis_credential_name: str = '') -> None: self.cache_dir = cache_dir - if catalysis_credential_name != '': + if has_catalysis and catalysis_credential_name != '': self.catalysis_client = StorageClient(catalysis_credential_name) else: self.catalysis_client = None diff --git a/tests/test_storage_with_catalysis.py b/tests/storage_with_catalysis.py similarity index 100% rename from tests/test_storage_with_catalysis.py rename to tests/storage_with_catalysis.py diff --git a/tests/test_attr_file_deserializing.py b/tests/test_attr_file_deserializing.py index 526e372..d4b5a9c 100644 --- a/tests/test_attr_file_deserializing.py +++ b/tests/test_attr_file_deserializing.py @@ -45,31 +45,29 @@ def setUp(self) -> None: def test_accessing_serialized_attrs(self): copyfile(os.path.join(current_dir, "pre_attr_access.py"), os.path.join(current_dir, "attr_access.py")) - proc = subprocess.Popen( + proc = subprocess.run( ["python", "attr_access.py"], stdout=subprocess.PIPE, - stderr=subprocess.PIPE, + stderr=subprocess.STDOUT, shell=False, cwd=current_dir, ) - _, error = proc.communicate() - self.assertEqual(error, b'') + + self.assertEqual(proc.stderr, None) copyfile(os.path.join(current_dir, "post_attr_access.py"), os.path.join(current_dir, "attr_access.py")) - proc = subprocess.Popen( + proc = subprocess.run( ["python", "attr_access.py"], stdout=subprocess.PIPE, - stderr=subprocess.PIPE, + stderr=subprocess.STDOUT, shell=False, cwd=current_dir, ) - out, error = proc.communicate() - - if error != b'': - print(error.decode()) + if proc.stderr and proc.stderr != b'': + print(proc.stderr.decode()) else: - print(out.decode()) + print(proc.stdout.decode()) - self.assertEqual(error, b'') + self.assertEqual(proc.stderr, None) diff --git a/tests/test_aurum_auto_commit.py b/tests/test_aurum_auto_commit.py index 22797a4..1aad55c 100644 --- a/tests/test_aurum_auto_commit.py +++ b/tests/test_aurum_auto_commit.py @@ -64,7 +64,7 @@ def tearDown(self): def test_step_decorator_with_commits(self): set_git_for_test() comp_proc = subprocess.run( - ["au -v init"], + ["au --verbose init"], shell=True, capture_output=True, encoding="utf-8", @@ -80,7 +80,7 @@ def test_step_decorator_with_commits(self): ) self.assertEqual(comp_proc.returncode, 0, msg=comp_proc.stderr) - self.assertEqual(comp_proc.stdout, "1\n") + self.assertEqual(comp_proc.stdout, "2\n") @self.st.step(skip_cache=False) def test_cache(): diff --git a/tests/test_default_cache_location.py b/tests/test_default_cache_location.py index 25fff11..c6ef3f0 100644 --- a/tests/test_default_cache_location.py +++ b/tests/test_default_cache_location.py @@ -26,6 +26,13 @@ from stripping import setup_stripping, setup_stripping_with_catalysis +try: + import catalysis + + has_catalysis = True +except ImportError: + has_catalysis = False + class DefaultCacheLocationTestCase(TestCase): @@ -37,8 +44,12 @@ def test_default_location_wo_catalysis(self): ) def test_default_location_with_catalysis(self): - st, _ = setup_stripping_with_catalysis(catalysis_credential_name="local") - self.assertEqual( - "/tmp/list/", - st.cache.cache_dir, - ) + if has_catalysis: + st, _ = setup_stripping_with_catalysis(catalysis_credential_name="local") + self.assertEqual( + "/tmp/list/", + st.cache.cache_dir, + ) + else: + with self.assertRaises(RuntimeError): + setup_stripping_with_catalysis(catalysis_credential_name="local") \ No newline at end of file