From 71a54ffb648f46ab7fb331c9464c7621fd820bb6 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Mon, 24 Jun 2024 13:38:39 +0300 Subject: [PATCH 01/52] Fix(requirements): bump dependencies, mainly to eliminate third-party security issues * Bump `TensorFlow 2.12.1->2.16.1` * Bump `catboost 1.2.2->1.2.5` * Bump `mlflow 2.9.2->2.12.2` * Bump `azureml-core 1.49.0->1.56.0` * Remove locked `gunicorn` and `packaging` --- requirements/full_requirements.txt | 258 +++++++++++++--------------- requirements/mlflow_requirements.in | 4 +- requirements/requirements.in | 7 +- 3 files changed, 124 insertions(+), 145 deletions(-) diff --git a/requirements/full_requirements.txt b/requirements/full_requirements.txt index d3ce099fd..e6a174b8f 100644 --- a/requirements/full_requirements.txt +++ b/requirements/full_requirements.txt @@ -1,11 +1,12 @@ # -# This file is autogenerated by pip-compile with Python 3.11 +# This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --no-emit-index-url --output-file=full_requirements.txt mlflow_requirements.in postgres_requirements.in requirements.in +# pip-compile --output-file=full_requirements.txt mlflow_requirements.in postgres_requirements.in requirements.in # absl-py==2.1.0 # via + # keras # tensorboard # tensorflow adal==1.2.7 @@ -14,7 +15,9 @@ adal==1.2.7 # msrestazure alembic==1.13.1 # via mlflow -argcomplete==3.2.1 +aniso8601==9.0.1 + # via graphene +argcomplete==3.4.0 # via # azureml-core # knack @@ -30,7 +33,7 @@ azure-common==1.1.28 # azure-mgmt-resource # azure-mgmt-storage # azureml-core -azure-core==1.29.1 +azure-core==1.30.2 # via # azure-mgmt-core # azureml-core @@ -51,29 +54,27 @@ azure-mgmt-core==1.4.0 # azure-mgmt-storage azure-mgmt-keyvault==10.3.0 # via azureml-core -azure-mgmt-network==25.1.0 +azure-mgmt-network==25.4.0 # via azureml-core -azure-mgmt-resource==23.0.1 +azure-mgmt-resource==23.1.1 # via azureml-core -azure-mgmt-storage==21.1.0 +azure-mgmt-storage==21.2.0 # via azureml-core -azureml-core==1.54.0.post1 +azureml-core==1.56.0 # via -r mlflow_requirements.in backports-tempfile==1.0 # via azureml-core backports-weakref==1.0.post1 # via backports-tempfile -bcrypt==4.1.2 +bcrypt==4.1.3 # via paramiko -blinker==1.7.0 +blinker==1.8.2 # via flask -cachetools==5.3.2 - # via - # google-auth - # gordo-core -catboost==1.2.2 +cachetools==5.3.3 + # via gordo-core +catboost==1.2.5 # via -r requirements.in -certifi==2023.11.17 +certifi==2024.6.2 # via # msrest # requests @@ -85,7 +86,6 @@ charset-normalizer==3.3.2 # via requests click==8.1.7 # via - # databricks-cli # flask # gordo-client # mlflow @@ -93,9 +93,9 @@ cloudpickle==3.0.0 # via mlflow contextlib2==21.6.0 # via azureml-core -contourpy==1.2.0 +contourpy==1.2.1 # via matplotlib -cryptography==41.0.7 +cryptography==42.0.8 # via # adal # msal @@ -105,13 +105,11 @@ cryptography==41.0.7 # secretstorage cycler==0.12.1 # via matplotlib -databricks-cli==0.18.0 - # via mlflow -dataclasses-json==0.6.3 +dataclasses-json==0.6.7 # via -r requirements.in dictdiffer==0.9.0 # via -r requirements.in -docker==6.1.3 +docker==7.1.0 # via # azureml-core # mlflow @@ -121,51 +119,50 @@ flask==2.3.3 # via # -r requirements.in # mlflow -flatbuffers==23.5.26 +flatbuffers==24.3.25 # via tensorflow -fonttools==4.47.2 +fonttools==4.53.0 # via matplotlib -gast==0.4.0 +gast==0.5.4 # via tensorflow gitdb==4.0.11 # via gitpython -gitpython==3.1.41 +gitpython==3.1.43 # via mlflow -google-auth==2.26.2 - # via - # google-auth-oauthlib - # tensorboard -google-auth-oauthlib==1.0.0 - # via tensorboard google-pasta==0.2.0 # via tensorflow gordo-client==6.2.8 # via -r requirements.in gordo-core==0.3.5 # via gordo-client -graphviz==0.20.1 +graphene==3.3 + # via mlflow +graphql-core==3.2.3 + # via + # graphene + # graphql-relay +graphql-relay==3.2.0 + # via graphene +graphviz==0.20.3 # via catboost -greenlet==3.0.3 - # via sqlalchemy -grpcio==1.60.0 +grpcio==1.64.1 # via # tensorboard # tensorflow -gunicorn==20.1.0 - # via - # -r requirements.in - # mlflow -h5py==3.10.0 +gunicorn==22.0.0 + # via mlflow +h5py==3.11.0 # via # -r requirements.in + # keras # tensorflow humanfriendly==10.0 # via azureml-core -idna==3.6 +idna==3.7 # via requests -importlib-metadata==7.0.1 +importlib-metadata==7.2.1 # via mlflow -influxdb==5.3.1 +influxdb==5.3.2 # via gordo-core isodate==0.6.1 # via @@ -176,13 +173,11 @@ isodate==0.6.1 # azure-mgmt-resource # azure-mgmt-storage # msrest -itsdangerous==2.1.2 +itsdangerous==2.2.0 # via flask -jax==0.4.23 - # via tensorflow jeepney==0.8.0 # via secretstorage -jinja2==3.1.3 +jinja2==3.1.4 # via # -r requirements.in # flask @@ -191,46 +186,52 @@ jmespath==1.0.1 # via # azureml-core # knack -joblib==1.3.2 +joblib==1.4.2 # via scikit-learn -jsonpickle==3.0.2 +jsonpickle==3.2.2 # via azureml-core -keras==2.12.0 +keras==3.3.3 # via tensorflow kiwisolver==1.4.5 # via matplotlib knack==0.11.0 # via azureml-core -libclang==16.0.6 +libclang==18.1.1 # via tensorflow -mako==1.3.1 +mako==1.3.5 # via alembic -markdown==3.5.2 +markdown==3.6 # via # mlflow # tensorboard -markupsafe==2.1.4 +markdown-it-py==3.0.0 + # via rich +markupsafe==2.1.5 # via # jinja2 # mako # werkzeug -marshmallow==3.20.2 +marshmallow==3.21.3 # via dataclasses-json -matplotlib==3.8.2 +matplotlib==3.9.0 # via # catboost # mlflow +mdurl==0.1.2 + # via markdown-it-py ml-dtypes==0.3.2 - # via jax -mlflow==2.9.2 + # via + # keras + # tensorflow +mlflow==2.12.2 # via -r mlflow_requirements.in -msal==1.26.0 +msal==1.29.0 # via # azureml-core # msal-extensions -msal-extensions==1.0.0 +msal-extensions==1.2.0 # via azureml-core -msgpack==1.0.7 +msgpack==1.0.8 # via influxdb msrest==0.7.1 # via @@ -243,17 +244,19 @@ msrestazure==0.6.4 # azureml-core mypy-extensions==1.0.0 # via typing-inspect +namex==0.0.8 + # via keras ndg-httpsclient==0.5.1 # via azureml-core -numexpr==2.8.8 +numexpr==2.10.1 # via gordo-core -numpy==1.24.3 +numpy==1.26.4 # via # catboost # contourpy # gordo-core # h5py - # jax + # keras # matplotlib # ml-dtypes # mlflow @@ -267,18 +270,15 @@ numpy==1.24.3 # tensorflow # xarray oauthlib==3.2.2 - # via - # databricks-cli - # requests-oauthlib + # via requests-oauthlib opt-einsum==3.3.0 + # via tensorflow +optree==0.11.0 + # via keras +packaging==24.1 # via - # jax - # tensorflow -packaging==21.3 - # via - # -r requirements.in # azureml-core - # docker + # gunicorn # knack # marshmallow # matplotlib @@ -296,19 +296,19 @@ paramiko==3.4.0 # via azureml-core pathspec==0.12.1 # via azureml-core -peewee==3.17.0 +peewee==3.17.5 # via -r postgres_requirements.in -pillow==10.2.0 +pillow==10.3.0 # via matplotlib -pkginfo==1.9.6 +pkginfo==1.11.1 # via azureml-core -plotly==5.18.0 +plotly==5.22.0 # via catboost -portalocker==2.8.2 +portalocker==2.10.0 # via msal-extensions -prometheus-client==0.19.0 +prometheus-client==0.20.0 # via -r requirements.in -protobuf==4.25.2 +protobuf==4.25.3 # via # mlflow # tensorboard @@ -319,38 +319,32 @@ pyarrow==14.0.2 # via # gordo-core # mlflow -pyasn1==0.5.1 - # via - # ndg-httpsclient - # pyasn1-modules - # rsa -pyasn1-modules==0.3.0 - # via google-auth -pycparser==2.21 +pyasn1==0.6.0 + # via ndg-httpsclient +pycparser==2.22 # via cffi -pydantic==1.10.14 +pydantic==1.10.17 # via gordo-client -pygments==2.17.2 - # via knack +pygments==2.18.0 + # via + # knack + # rich pyjwt[crypto]==2.8.0 # via # adal # azureml-core - # databricks-cli # msal pynacl==1.5.0 # via paramiko -pyopenssl==23.3.0 +pyopenssl==24.1.0 # via # azureml-core # ndg-httpsclient -pyparsing==3.1.1 - # via - # matplotlib - # packaging +pyparsing==3.1.2 + # via matplotlib pysocks==1.7.1 # via requests -python-dateutil==2.8.2 +python-dateutil==2.9.0.post0 # via # -r requirements.in # adal @@ -358,7 +352,7 @@ python-dateutil==2.8.2 # influxdb # matplotlib # pandas -pytz==2023.3.post1 +pytz==2024.1 # via # azureml-core # influxdb @@ -371,12 +365,11 @@ pyyaml==6.0.1 # mlflow querystring-parser==1.2.4 # via mlflow -requests[socks]==2.31.0 +requests[socks]==2.32.3 # via # adal # azure-core # azureml-core - # databricks-cli # docker # gordo-client # influxdb @@ -384,22 +377,19 @@ requests[socks]==2.31.0 # msal # msrest # requests-oauthlib - # tensorboard -requests-oauthlib==1.3.1 - # via - # google-auth-oauthlib - # msrest -rsa==4.9 - # via google-auth -scikit-learn==1.4.0 + # tensorflow +requests-oauthlib==2.0.0 + # via msrest +rich==13.7.1 + # via keras +scikit-learn==1.5.0 # via # gordo-core # mlflow -scipy==1.12.0 +scipy==1.13.1 # via # catboost # gordo-core - # jax # mlflow # scikit-learn secretstorage==3.3.3 @@ -413,75 +403,67 @@ six==1.16.0 # astunparse # azure-core # catboost - # databricks-cli # google-pasta # influxdb # isodate # msrestazure # python-dateutil # querystring-parser + # tensorboard # tensorflow smmap==5.0.1 # via gitdb -sqlalchemy==2.0.24 +sqlalchemy==2.0.31 # via # alembic # mlflow -sqlparse==0.4.4 +sqlparse==0.5.0 # via mlflow tabulate==0.9.0 - # via - # databricks-cli - # knack -tenacity==8.2.3 + # via knack +tenacity==8.4.1 # via plotly -tensorboard==2.12.3 +tensorboard==2.16.2 # via tensorflow tensorboard-data-server==0.7.2 # via tensorboard -tensorflow==2.12.1 +tensorflow==2.16.1 # via -r requirements.in -tensorflow-estimator==2.12.0 - # via tensorflow -tensorflow-io-gcs-filesystem==0.35.0 +tensorflow-io-gcs-filesystem==0.37.0 # via tensorflow termcolor==2.4.0 # via tensorflow -threadpoolctl==3.2.0 +threadpoolctl==3.5.0 # via scikit-learn -typing-extensions==4.5.0 +typing-extensions==4.12.2 # via # alembic # azure-core + # optree # pydantic # sqlalchemy # tensorflow # typing-inspect typing-inspect==0.9.0 # via dataclasses-json -urllib3==2.1.0 +urllib3==2.2.2 # via # azureml-core - # databricks-cli # docker # requests -websocket-client==1.7.0 - # via docker -werkzeug==3.0.1 +werkzeug==3.0.3 # via # flask # tensorboard -wheel==0.42.0 - # via - # astunparse - # tensorboard -wrapt==1.14.1 +wheel==0.43.0 + # via astunparse +wrapt==1.16.0 # via # gordo-client # tensorflow xarray==2023.12.0 # via gordo-core -zipp==3.17.0 +zipp==3.19.2 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: diff --git a/requirements/mlflow_requirements.in b/requirements/mlflow_requirements.in index 21feaa941..8984c9033 100644 --- a/requirements/mlflow_requirements.in +++ b/requirements/mlflow_requirements.in @@ -1,2 +1,2 @@ -mlflow~=2.9 -azureml-core~=1.49 +mlflow~=2.12.2 +azureml-core~=1.56.0 diff --git a/requirements/requirements.in b/requirements/requirements.in index 15a627c68..d837eb11e 100644 --- a/requirements/requirements.in +++ b/requirements/requirements.in @@ -1,14 +1,11 @@ dictdiffer~=0.8 dataclasses-json~=0.3 -gunicorn~=20.0 h5py~=3.1 jinja2~=3.1 python-dateutil~=2.8 -tensorflow>=2.11,<2.13 +tensorflow~=2.16.0 Flask>=2.2.5,<3.0.0 simplejson~=3.17 -catboost~=1.2.2 +catboost~=1.2.5 prometheus_client~=0.7 -# Due to azureml-core 1.49.0 depends on packaging<22.0 -packaging>=21.0,<22.0 gordo-client~=6.2 From 4064a8e19f6a2baca3995da7955bdd38c058a2d1 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Mon, 24 Jun 2024 14:22:21 +0300 Subject: [PATCH 02/52] Fix(requirements): bump test dependencies, mainly to eliminate third-party security issues * Bump `docker 6.1.3->7.1.0` * Bump `responses 0.23->0.25.3` * Bump `black 22.12.0->24.4.2` * Bump `notebook 6.5.6->7.2.1` --- requirements/test_requirements.in | 9 +- requirements/test_requirements.txt | 258 +++++++++++++++-------------- 2 files changed, 140 insertions(+), 127 deletions(-) diff --git a/requirements/test_requirements.in b/requirements/test_requirements.in index 077c481e2..ebdeb837e 100644 --- a/requirements/test_requirements.in +++ b/requirements/test_requirements.in @@ -1,5 +1,5 @@ -c full_requirements.txt -docker>=4.0,<7.0 +docker~=7.1.0 pytest~=7.2 pytest-xdist~=3.2 pytest-mock~=3.6 @@ -9,10 +9,9 @@ pytest-cov~=4.0 pytest-benchmark~=4.0 pytest-flakes~=4.0 mock~=5.0 -responses~=0.23 -# Due to packaging>22.0 in black 23.0, azureml-core~=1.49 requires packaging<22.0 -black>=22.0,<23.0 -notebook~=6.4 +responses~=0.25.3 +black~=24.4.2 +notebook~=7.2.1 nbconvert~=7.4 types-simplejson types-python-dateutil diff --git a/requirements/test_requirements.txt b/requirements/test_requirements.txt index 74e614df9..3100f22cd 100644 --- a/requirements/test_requirements.txt +++ b/requirements/test_requirements.txt @@ -1,36 +1,43 @@ # -# This file is autogenerated by pip-compile with Python 3.11 +# This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --no-emit-index-url --output-file=test_requirements.txt test_requirements.in +# pip-compile --output-file=test_requirements.txt test_requirements.in # -anyio==4.2.0 - # via jupyter-server -argon2-cffi==23.1.0 +anyio==4.4.0 # via + # httpx # jupyter-server - # nbclassic - # notebook +appnope==0.1.4 + # via ipykernel +argon2-cffi==23.1.0 + # via jupyter-server argon2-cffi-bindings==21.2.0 # via argon2-cffi arrow==1.3.0 # via isoduration asttokens==2.4.1 # via stack-data +async-lru==2.0.4 + # via jupyterlab attrs==23.2.0 # via # jsonschema # pytest-mypy # referencing +babel==2.15.0 + # via jupyterlab-server beautifulsoup4==4.12.3 # via nbconvert -black==22.12.0 +black==24.4.2 # via -r test_requirements.in bleach==6.1.0 # via nbconvert -certifi==2023.11.17 +certifi==2024.6.2 # via # -c full_requirements.txt + # httpcore + # httpx # requests cffi==1.16.0 # via @@ -44,104 +51,117 @@ click==8.1.7 # via # -c full_requirements.txt # black -comm==0.2.1 +comm==0.2.2 # via ipykernel -coverage[toml]==7.4.0 +coverage[toml]==7.5.4 # via pytest-cov -debugpy==1.8.0 +debugpy==1.8.1 # via ipykernel decorator==5.1.1 # via ipython defusedxml==0.7.1 # via nbconvert -docker==6.1.3 +docker==7.1.0 # via # -c full_requirements.txt # -r test_requirements.in -entrypoints==0.4 +exceptiongroup==1.2.1 # via - # -c full_requirements.txt - # jupyter-client -execnet==2.0.2 + # anyio + # ipython + # pytest +execnet==2.1.1 # via pytest-xdist executing==2.0.1 # via stack-data -fastjsonschema==2.19.1 +fastjsonschema==2.20.0 # via nbformat -filelock==3.13.1 +filelock==3.15.4 # via pytest-mypy fqdn==1.5.1 # via jsonschema -idna==3.6 +h11==0.14.0 + # via httpcore +httpcore==1.0.5 + # via httpx +httpx==0.27.0 + # via jupyterlab +idna==3.7 # via # -c full_requirements.txt # anyio + # httpx # jsonschema # requests iniconfig==2.0.0 # via pytest -ipykernel==6.29.0 - # via - # nbclassic - # notebook -ipython==8.20.0 +ipykernel==6.29.4 + # via jupyterlab +ipython==8.25.0 # via ipykernel -ipython-genutils==0.2.0 - # via - # nbclassic - # notebook isoduration==20.11.0 # via jsonschema jedi==0.19.1 # via ipython -jinja2==3.1.3 +jinja2==3.1.4 # via # -c full_requirements.txt # jupyter-server - # nbclassic + # jupyterlab + # jupyterlab-server # nbconvert - # notebook -jsonpointer==2.4 +json5==0.9.25 + # via jupyterlab-server +jsonpointer==3.0.0 # via jsonschema -jsonschema[format-nongpl]==4.21.1 +jsonschema[format-nongpl]==4.22.0 # via # jupyter-events + # jupyterlab-server # nbformat jsonschema-specifications==2023.12.1 # via jsonschema -jupyter-client==7.4.9 +jupyter-client==8.6.2 # via # ipykernel # jupyter-server - # nbclassic # nbclient - # notebook -jupyter-core==5.7.1 +jupyter-core==5.7.2 # via # ipykernel # jupyter-client # jupyter-server - # nbclassic + # jupyterlab # nbclient # nbconvert # nbformat - # notebook -jupyter-events==0.9.0 +jupyter-events==0.10.0 # via jupyter-server -jupyter-server==2.12.5 +jupyter-lsp==2.2.5 + # via jupyterlab +jupyter-server==2.14.1 # via - # nbclassic + # jupyter-lsp + # jupyterlab + # jupyterlab-server + # notebook # notebook-shim -jupyter-server-terminals==0.5.2 +jupyter-server-terminals==0.5.3 # via jupyter-server +jupyterlab==4.2.2 + # via notebook jupyterlab-pygments==0.3.0 # via nbconvert -markupsafe==2.1.4 +jupyterlab-server==2.27.2 + # via + # jupyterlab + # notebook +markupsafe==2.1.5 # via # -c full_requirements.txt # jinja2 # nbconvert -matplotlib-inline==0.1.6 +matplotlib-inline==0.1.7 # via # ipykernel # ipython @@ -149,53 +169,47 @@ mistune==3.0.2 # via nbconvert mock==5.1.0 # via -r test_requirements.in -mypy==1.8.0 +mypy==1.10.0 # via pytest-mypy mypy-extensions==1.0.0 # via # -c full_requirements.txt # black # mypy -nbclassic==1.0.0 - # via notebook -nbclient==0.9.0 +nbclient==0.10.0 # via nbconvert -nbconvert==7.14.2 +nbconvert==7.16.4 # via # -r test_requirements.in # jupyter-server - # nbclassic - # notebook -nbformat==5.9.2 +nbformat==5.10.4 # via # jupyter-server - # nbclassic # nbclient # nbconvert - # notebook nest-asyncio==1.6.0 + # via ipykernel +notebook==7.2.1 + # via -r test_requirements.in +notebook-shim==0.2.4 # via - # ipykernel - # jupyter-client - # nbclassic + # jupyterlab # notebook -notebook==6.5.6 - # via -r test_requirements.in -notebook-shim==0.2.3 - # via nbclassic -overrides==7.6.0 +overrides==7.7.0 # via jupyter-server -packaging==21.3 +packaging==24.1 # via # -c full_requirements.txt - # docker + # black # ipykernel # jupyter-server + # jupyterlab + # jupyterlab-server # nbconvert # pytest pandocfilters==1.5.1 # via nbconvert -parso==0.8.3 +parso==0.8.4 # via jedi pathspec==0.12.1 # via @@ -203,21 +217,19 @@ pathspec==0.12.1 # black pexpect==4.9.0 # via ipython -platformdirs==4.1.0 +platformdirs==4.2.2 # via # black # jupyter-core -pluggy==1.3.0 +pluggy==1.5.0 # via pytest -prometheus-client==0.19.0 +prometheus-client==0.20.0 # via # -c full_requirements.txt # jupyter-server - # nbclassic - # notebook -prompt-toolkit==3.0.43 +prompt-toolkit==3.0.47 # via ipython -psutil==5.9.8 +psutil==6.0.0 # via ipykernel ptyprocess==0.7.0 # via @@ -227,21 +239,17 @@ pure-eval==0.2.2 # via stack-data py-cpuinfo==9.0.0 # via pytest-benchmark -pycparser==2.21 +pycparser==2.22 # via # -c full_requirements.txt # cffi pyflakes==3.2.0 # via pytest-flakes -pygments==2.17.2 +pygments==2.18.0 # via # -c full_requirements.txt # ipython # nbconvert -pyparsing==3.1.1 - # via - # -c full_requirements.txt - # packaging pytest==7.4.4 # via # -r test_requirements.in @@ -258,15 +266,15 @@ pytest-cov==4.1.0 # via -r test_requirements.in pytest-flakes==4.0.5 # via -r test_requirements.in -pytest-mock==3.12.0 +pytest-mock==3.14.0 # via -r test_requirements.in pytest-mypy==0.10.3 # via -r test_requirements.in -pytest-timeout==2.2.0 +pytest-timeout==2.3.1 # via -r test_requirements.in -pytest-xdist==3.5.0 +pytest-xdist==3.6.1 # via -r test_requirements.in -python-dateutil==2.8.2 +python-dateutil==2.9.0.post0 # via # -c full_requirements.txt # arrow @@ -278,24 +286,23 @@ pyyaml==6.0.1 # -c full_requirements.txt # jupyter-events # responses -pyzmq==24.0.1 +pyzmq==26.0.3 # via # ipykernel # jupyter-client # jupyter-server - # nbclassic - # notebook -referencing==0.32.1 +referencing==0.35.1 # via # jsonschema # jsonschema-specifications # jupyter-events -requests==2.31.0 +requests==2.32.3 # via # -c full_requirements.txt # docker + # jupyterlab-server # responses -responses==0.24.1 +responses==0.25.3 # via -r test_requirements.in rfc3339-validator==0.1.4 # via @@ -305,15 +312,12 @@ rfc3986-validator==0.1.1 # via # jsonschema # jupyter-events -rpds-py==0.17.1 +rpds-py==0.18.1 # via # jsonschema # referencing -send2trash==1.8.2 - # via - # jupyter-server - # nbclassic - # notebook +send2trash==1.8.3 + # via jupyter-server six==1.16.0 # via # -c full_requirements.txt @@ -321,29 +325,36 @@ six==1.16.0 # bleach # python-dateutil # rfc3339-validator -sniffio==1.3.0 - # via anyio +sniffio==1.3.1 + # via + # anyio + # httpx soupsieve==2.5 # via beautifulsoup4 stack-data==0.6.3 # via ipython -terminado==0.18.0 +terminado==0.18.1 # via # jupyter-server # jupyter-server-terminals - # nbclassic - # notebook -tinycss2==1.2.1 +tinycss2==1.3.0 # via nbconvert -tornado==6.4 +tomli==2.0.1 + # via + # black + # coverage + # jupyterlab + # mypy + # pytest +tornado==6.4.1 # via # ipykernel # jupyter-client # jupyter-server - # nbclassic + # jupyterlab # notebook # terminado -traitlets==5.14.1 +traitlets==5.14.3 # via # comm # ipykernel @@ -352,35 +363,38 @@ traitlets==5.14.1 # jupyter-core # jupyter-events # jupyter-server + # jupyterlab # matplotlib-inline - # nbclassic # nbclient # nbconvert # nbformat - # notebook -types-mock==5.1.0.20240106 +types-mock==5.1.0.20240425 # via -r test_requirements.in -types-python-dateutil==2.8.19.20240106 +types-python-dateutil==2.9.0.20240316 # via # -r test_requirements.in # arrow -types-pytz==2023.3.1.1 +types-pytz==2024.1.0.20240417 # via -r test_requirements.in -types-pyyaml==6.0.12.12 +types-pyyaml==6.0.12.20240311 # via -r test_requirements.in -types-requests==2.31.0.20240106 +types-requests==2.32.0.20240622 # via -r test_requirements.in -types-setuptools==69.0.0.20240115 +types-setuptools==70.0.0.20240524 # via -r test_requirements.in -types-simplejson==3.19.0.2 +types-simplejson==3.19.0.20240310 # via -r test_requirements.in -typing-extensions==4.5.0 +typing-extensions==4.12.2 # via # -c full_requirements.txt + # anyio + # async-lru + # black + # ipython # mypy uri-template==1.3.0 # via jsonschema -urllib3==2.1.0 +urllib3==2.2.2 # via # -c full_requirements.txt # docker @@ -389,14 +403,14 @@ urllib3==2.1.0 # types-requests wcwidth==0.2.13 # via prompt-toolkit -webcolors==1.13 +webcolors==24.6.0 # via jsonschema webencodings==0.5.1 # via # bleach # tinycss2 -websocket-client==1.7.0 - # via - # -c full_requirements.txt - # docker - # jupyter-server +websocket-client==1.8.0 + # via jupyter-server + +# The following packages are considered to be unsafe in a requirements file: +# setuptools From 695c592e7e6c6130563d74a69c58db562d4eb235 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Mon, 24 Jun 2024 14:23:36 +0300 Subject: [PATCH 03/52] Fix(requirements): use scikeras to bring back keras wrappers * Add `scikeras~=0.13.0` dependency * Use `scikeras.wrappers.KerasRegressor` instead of removed `tensorflow.keras.wrappers.scikit_learn import KerasRegressor` --- gordo/machine/model/models.py | 2 +- requirements/full_requirements.txt | 7 ++++++- requirements/requirements.in | 1 + tests/gordo/machine/model/test_model.py | 3 +-- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index 236b808d7..6bd0aa01d 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -14,7 +14,7 @@ import tensorflow.keras.models from tensorflow.keras.models import load_model, save_model from tensorflow.keras.preprocessing.sequence import pad_sequences, TimeseriesGenerator -from tensorflow.keras.wrappers.scikit_learn import KerasRegressor as BaseWrapper +from scikeras.wrappers import KerasRegressor as BaseWrapper from tensorflow.keras.callbacks import History import numpy as np import pandas as pd diff --git a/requirements/full_requirements.txt b/requirements/full_requirements.txt index e6a174b8f..d9cc33b5a 100644 --- a/requirements/full_requirements.txt +++ b/requirements/full_requirements.txt @@ -191,7 +191,9 @@ joblib==1.4.2 jsonpickle==3.2.2 # via azureml-core keras==3.3.3 - # via tensorflow + # via + # scikeras + # tensorflow kiwisolver==1.4.5 # via matplotlib knack==0.11.0 @@ -382,10 +384,13 @@ requests-oauthlib==2.0.0 # via msrest rich==13.7.1 # via keras +scikeras==0.13.0 + # via -r requirements.in scikit-learn==1.5.0 # via # gordo-core # mlflow + # scikeras scipy==1.13.1 # via # catboost diff --git a/requirements/requirements.in b/requirements/requirements.in index d837eb11e..0bf3eeb6d 100644 --- a/requirements/requirements.in +++ b/requirements/requirements.in @@ -4,6 +4,7 @@ h5py~=3.1 jinja2~=3.1 python-dateutil~=2.8 tensorflow~=2.16.0 +scikeras~=0.13.0 Flask>=2.2.5,<3.0.0 simplejson~=3.17 catboost~=1.2.5 diff --git a/tests/gordo/machine/model/test_model.py b/tests/gordo/machine/model/test_model.py index 2720724ad..f068b35f1 100644 --- a/tests/gordo/machine/model/test_model.py +++ b/tests/gordo/machine/model/test_model.py @@ -11,8 +11,7 @@ from sklearn.exceptions import NotFittedError from sklearn.pipeline import Pipeline from sklearn.model_selection import cross_val_score, TimeSeriesSplit - -from tensorflow.keras.wrappers.scikit_learn import KerasRegressor as BaseWrapper +from scikeras.wrappers import KerasRegressor as BaseWrapper from tensorflow.keras.callbacks import EarlyStopping from tests.utils import get_model From 9a75758abd3230d84aefb96e4d6f32464cfa4444 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Mon, 24 Jun 2024 14:28:37 +0300 Subject: [PATCH 04/52] Fix: apply formatting --- docs/_static/architecture_diagram.py | 9 +++-- docs/conf.py | 15 +++++--- gordo/machine/model/anomaly/diff.py | 36 +++++++++---------- .../model/factories/lstm_autoencoder.py | 1 - gordo/machine/model/models.py | 1 - gordo/machine/model/transformers/imputer.py | 16 +++++---- gordo/machine/model/utils.py | 10 +++--- gordo/serializer/from_definition.py | 6 ++-- gordo/serializer/into_definition.py | 8 +++-- gordo/server/utils.py | 8 +++-- gordo/util/version.py | 3 +- .../config_elements/normalized_config.py | 6 ++-- setup.py | 2 +- tests/gordo/client/test_client.py | 1 + tests/gordo/util/test_sensor_tag.py | 1 - 15 files changed, 70 insertions(+), 53 deletions(-) diff --git a/docs/_static/architecture_diagram.py b/docs/_static/architecture_diagram.py index 934fafd8f..d34a77249 100644 --- a/docs/_static/architecture_diagram.py +++ b/docs/_static/architecture_diagram.py @@ -9,9 +9,14 @@ from diagrams.k8s.storage import PV from diagrams.custom import Custom -directory=os.path.dirname(__file__) +directory = os.path.dirname(__file__) -with Diagram("Gordo flow", filename=os.path.join(directory, "architecture_diagram"), outformat="png", show=False) as diag: +with Diagram( + "Gordo flow", + filename=os.path.join(directory, "architecture_diagram"), + outformat="png", + show=False, +) as diag: with Cluster("K8s"): gordo = CRD("Gordo") api = API("") diff --git a/docs/conf.py b/docs/conf.py index abe2c3bf7..4385d67e1 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -26,7 +26,11 @@ author = "Equinor ASA" version = gordo.__version__ _parsed_version = parse_version(version) -commit = f"{version}" if type(_parsed_version) is GordoRelease and not _parsed_version.suffix else "HEAD" +commit = ( + f"{version}" + if type(_parsed_version) is GordoRelease and not _parsed_version.suffix + else "HEAD" +) # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration @@ -44,7 +48,7 @@ "IPython.sphinxext.ipython_console_highlighting", "sphinx_copybutton", "sphinx_click", - "nbsphinx" + "nbsphinx", ] root_doc = "index" @@ -59,8 +63,11 @@ _ignore_linkcode_infos = [ # caused "OSError: could not find class definition" {"module": "gordo_core.utils", "fullname": "PredictionResult"}, - {'module': 'gordo.workflow.config_elements.schemas', 'fullname': 'Model.Config.extra'}, - {'module': 'gordo.reporters.postgres', 'fullname': 'Machine.DoesNotExist'} + { + "module": "gordo.workflow.config_elements.schemas", + "fullname": "Model.Config.extra", + }, + {"module": "gordo.reporters.postgres", "fullname": "Machine.DoesNotExist"}, ] diff --git a/gordo/machine/model/anomaly/diff.py b/gordo/machine/model/anomaly/diff.py index 051610d21..e3b1f6f5d 100644 --- a/gordo/machine/model/anomaly/diff.py +++ b/gordo/machine/model/anomaly/diff.py @@ -95,13 +95,13 @@ def get_metadata(self): if hasattr(self, "aggregate_threshold_"): metadata["aggregate-threshold"] = self.aggregate_threshold_ if hasattr(self, "feature_thresholds_per_fold_"): - metadata[ - "feature-thresholds-per-fold" - ] = self.feature_thresholds_per_fold_.to_dict() + metadata["feature-thresholds-per-fold"] = ( + self.feature_thresholds_per_fold_.to_dict() + ) if hasattr(self, "aggregate_thresholds_per_fold_"): - metadata[ - "aggregate-thresholds-per-fold" - ] = self.aggregate_thresholds_per_fold_ + metadata["aggregate-thresholds-per-fold"] = ( + self.aggregate_thresholds_per_fold_ + ) # Window threshold metadata if hasattr(self, "window"): metadata["window"] = self.window @@ -111,9 +111,9 @@ def get_metadata(self): hasattr(self, "smooth_feature_thresholds_") and self.smooth_aggregate_threshold_ is not None ): - metadata[ - "smooth-feature-thresholds" - ] = self.smooth_feature_thresholds_.tolist() + metadata["smooth-feature-thresholds"] = ( + self.smooth_feature_thresholds_.tolist() + ) if ( hasattr(self, "smooth_aggregate_threshold_") and self.smooth_aggregate_threshold_ is not None @@ -121,13 +121,13 @@ def get_metadata(self): metadata["smooth-aggregate-threshold"] = self.smooth_aggregate_threshold_ if hasattr(self, "smooth_feature_thresholds_per_fold_"): - metadata[ - "smooth-feature-thresholds-per-fold" - ] = self.smooth_feature_thresholds_per_fold_.to_dict() + metadata["smooth-feature-thresholds-per-fold"] = ( + self.smooth_feature_thresholds_per_fold_.to_dict() + ) if hasattr(self, "smooth_aggregate_thresholds_per_fold_"): - metadata[ - "smooth-aggregate-thresholds-per-fold" - ] = self.smooth_aggregate_thresholds_per_fold_ + metadata["smooth-aggregate-thresholds-per-fold"] = ( + self.smooth_aggregate_thresholds_per_fold_ + ) if isinstance(self.base_estimator, GordoBase): metadata.update(self.base_estimator.get_metadata()) @@ -241,9 +241,9 @@ def cross_validate( smooth_aggregate_threshold_fold = ( scaled_mse.rolling(self.window).min().max() ) - self.smooth_aggregate_thresholds_per_fold_[ - f"fold-{i}" - ] = smooth_aggregate_threshold_fold + self.smooth_aggregate_thresholds_per_fold_[f"fold-{i}"] = ( + smooth_aggregate_threshold_fold + ) smooth_tag_thresholds_fold = mae.rolling(self.window).min().max() smooth_tag_thresholds_fold.name = f"fold-{i}" diff --git a/gordo/machine/model/factories/lstm_autoencoder.py b/gordo/machine/model/factories/lstm_autoencoder.py index d2240ce20..a0e334fa6 100644 --- a/gordo/machine/model/factories/lstm_autoencoder.py +++ b/gordo/machine/model/factories/lstm_autoencoder.py @@ -188,7 +188,6 @@ def lstm_hourglass( compile_kwargs: Dict[str, Any] = dict(), **kwargs, ) -> tensorflow.keras.models.Sequential: - """ Builds an hourglass shaped neural network, with decreasing number of neurons diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index 6bd0aa01d..0506e6a78 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -541,7 +541,6 @@ def _validate_and_fix_size_of_X(self, X): def fit( # type: ignore self, X: np.ndarray, y: np.ndarray, **kwargs ) -> "KerasLSTMForecast": - """ This fits a one step forecast LSTM architecture. diff --git a/gordo/machine/model/transformers/imputer.py b/gordo/machine/model/transformers/imputer.py index 5c3d6c57b..43a99f176 100644 --- a/gordo/machine/model/transformers/imputer.py +++ b/gordo/machine/model/transformers/imputer.py @@ -71,14 +71,18 @@ def fit(self, X: Union[pd.DataFrame, np.ndarray], y=None): # Calculate a 1d arrays of fill values for each feature self._posinf_fill_values = _posinf_fill_values.apply( - lambda val: val + self.delta - if max_allowable_value - self.delta > val - else max_allowable_value + lambda val: ( + val + self.delta + if max_allowable_value - self.delta > val + else max_allowable_value + ) ) self._neginf_fill_values = _neginf_fill_values.apply( - lambda val: val - self.delta - if min_allowable_value + self.delta < val - else min_allowable_value + lambda val: ( + val - self.delta + if min_allowable_value + self.delta < val + else min_allowable_value + ) ) return self diff --git a/gordo/machine/model/utils.py b/gordo/machine/model/utils.py index 3e17b1865..801db59f5 100644 --- a/gordo/machine/model/utils.py +++ b/gordo/machine/model/utils.py @@ -111,9 +111,11 @@ def make_base_dataframe( # Calculate the end times if possible, or also all 'None's end_series = start_series.map( - lambda start: (start + frequency).isoformat() - if isinstance(start, datetime) and frequency is not None - else None + lambda start: ( + (start + frequency).isoformat() + if isinstance(start, datetime) and frequency is not None + else None + ) ) # Convert to isoformatted string for JSON serialization. @@ -134,7 +136,7 @@ def make_base_dataframe( # the multiindex column dataframe, and naming their second level labels as needed. name: str values: np.ndarray - for (name, values) in filter(lambda nv: nv[1] is not None, names_n_values): + for name, values in filter(lambda nv: nv[1] is not None, names_n_values): _tags = tags if name == "model-input" else target_tag_list diff --git a/gordo/serializer/from_definition.py b/gordo/serializer/from_definition.py index 4412e7947..fa344e253 100644 --- a/gordo/serializer/from_definition.py +++ b/gordo/serializer/from_definition.py @@ -174,9 +174,9 @@ def _build_step( import_str = list(step.keys())[0] try: - StepClass: Union[ - None, FeatureUnion, Pipeline, BaseEstimator - ] = import_location(import_str) + StepClass: Union[None, FeatureUnion, Pipeline, BaseEstimator] = ( + import_location(import_str) + ) except (ImportError, ValueError): StepClass = None diff --git a/gordo/serializer/into_definition.py b/gordo/serializer/into_definition.py index ae13588b9..a2eec78c7 100644 --- a/gordo/serializer/into_definition.py +++ b/gordo/serializer/into_definition.py @@ -172,9 +172,11 @@ def load_definition_from_params(params: dict, tuples_to_list: bool = True) -> di # TODO: Make this more robust, probably via another function to parse the iterable recursively # TODO: b/c it _could_, in theory, be a dict of {str: BaseEstimator} or similar. definition[param] = [ - _decompose_node(leaf[1], tuples_to_list=tuples_to_list) - if isinstance(leaf, tuple) - else leaf + ( + _decompose_node(leaf[1], tuples_to_list=tuples_to_list) + if isinstance(leaf, tuple) + else leaf + ) for leaf in param_val ] diff --git a/gordo/server/utils.py b/gordo/server/utils.py index 9cf1a1942..f54817378 100644 --- a/gordo/server/utils.py +++ b/gordo/server/utils.py @@ -131,9 +131,11 @@ def dataframe_to_dict(df: pd.DataFrame) -> dict: data.index = data.index.astype(str) if isinstance(df.columns, pd.MultiIndex): return { - col: data[col].to_dict() - if isinstance(data[col], pd.DataFrame) - else pd.DataFrame(data[col]).to_dict() + col: ( + data[col].to_dict() + if isinstance(data[col], pd.DataFrame) + else pd.DataFrame(data[col]).to_dict() + ) for col in data.columns.get_level_values(0) } else: diff --git a/gordo/util/version.py b/gordo/util/version.py index a43d3a5dd..60b11157d 100644 --- a/gordo/util/version.py +++ b/gordo/util/version.py @@ -8,8 +8,7 @@ class Version(metaclass=ABCMeta): @abstractmethod - def get_version(self): - ... + def get_version(self): ... class Special(Enum): diff --git a/gordo/workflow/config_elements/normalized_config.py b/gordo/workflow/config_elements/normalized_config.py index 628071ba3..42377d378 100644 --- a/gordo/workflow/config_elements/normalized_config.py +++ b/gordo/workflow/config_elements/normalized_config.py @@ -119,10 +119,8 @@ def __init__( if gordo_version is None: gordo_version = __version__ default_globals = self.get_default_globals(gordo_version) - default_globals["runtime"]["influx"][ # type: ignore - "resources" - ] = _calculate_influx_resources( # type: ignore - len(config["machines"]) + default_globals["runtime"]["influx"]["resources"] = ( # type: ignore + _calculate_influx_resources(len(config["machines"])) # type: ignore ) passed_globals = load_globals_config( diff --git a/setup.py b/setup.py index d87a86d94..0490843b4 100644 --- a/setup.py +++ b/setup.py @@ -49,7 +49,7 @@ def requirements(fp: str): ], description="Train and build models for Argo / Kubernetes", long_description=long_description, - long_description_content_type='text/markdown', + long_description_content_type="text/markdown", entry_points={"console_scripts": ["gordo=gordo.cli:gordo"]}, install_requires=install_requires, license="AGPLv3", diff --git a/tests/gordo/client/test_client.py b/tests/gordo/client/test_client.py index 04097cbe7..e3b306363 100644 --- a/tests/gordo/client/test_client.py +++ b/tests/gordo/client/test_client.py @@ -1,4 +1,5 @@ """Tests for gordo-client.""" + # TODO: Move those tests to gordo-client project. import json diff --git a/tests/gordo/util/test_sensor_tag.py b/tests/gordo/util/test_sensor_tag.py index 139597f9c..8b1378917 100644 --- a/tests/gordo/util/test_sensor_tag.py +++ b/tests/gordo/util/test_sensor_tag.py @@ -1,2 +1 @@ - From 01eac86295091bc2d6eb73062da7cb22688d609e Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Mon, 24 Jun 2024 14:47:39 +0300 Subject: [PATCH 05/52] Fix: add `super().__init__(**kwargs)` call to `KerasBaseEstimator` --- gordo/machine/model/models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index 0506e6a78..e8765bc1a 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -83,6 +83,7 @@ def __init__( self.kind = self.load_kind(kind) self.kwargs: Dict[str, Any] = kwargs + super().__init__(**kwargs) @staticmethod def parse_module_path(module_path) -> Tuple[Optional[str], str]: From d6faadcf40b05be9ec5cb7988a5528a3828db6bb Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Mon, 24 Jun 2024 14:51:21 +0300 Subject: [PATCH 06/52] Fix: rename `build_fn` to `model` --- gordo/machine/model/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index e8765bc1a..40898b426 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -302,7 +302,7 @@ def get_params(self, **params): Parameters used in this estimator """ params = super().get_params(**params) - params.pop("build_fn", None) + params.pop("model", None) params.update({"kind": self.kind}) params.update(self.kwargs) return params From d47855e37a179f9367b3c83af19062e9a4ae267c Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Mon, 24 Jun 2024 15:04:26 +0300 Subject: [PATCH 07/52] Fix: move `super().__init__(**kwargs)` to start of `KerasBaseEstimator.__init__` --- gordo/machine/model/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index 40898b426..c7131273e 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -78,12 +78,12 @@ def __init__( building function and/or any additional args to be passed to Keras' fit() method """ + super().__init__(**kwargs) self.build_fn = None self.history = None self.kind = self.load_kind(kind) self.kwargs: Dict[str, Any] = kwargs - super().__init__(**kwargs) @staticmethod def parse_module_path(module_path) -> Tuple[Optional[str], str]: From 77bc40c023531e3a14bfd4acca79951841f7648f Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Mon, 24 Jun 2024 15:19:08 +0300 Subject: [PATCH 08/52] Fix: i'm done with comments for now --- gordo/machine/model/models.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index c7131273e..f3d7619b6 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -15,7 +15,6 @@ from tensorflow.keras.models import load_model, save_model from tensorflow.keras.preprocessing.sequence import pad_sequences, TimeseriesGenerator from scikeras.wrappers import KerasRegressor as BaseWrapper -from tensorflow.keras.callbacks import History import numpy as np import pandas as pd import xarray as xr @@ -78,7 +77,6 @@ def __init__( building function and/or any additional args to be passed to Keras' fit() method """ - super().__init__(**kwargs) self.build_fn = None self.history = None @@ -271,8 +269,8 @@ def fit( y = y.values kwargs.setdefault("verbose", 0) history = super().fit(X, y, sample_weight=None, **kwargs) - if isinstance(history, History): - self.history = history + if isinstance(history, BaseWrapper): + self.history = history.history_ return self def predict(self, X: np.ndarray, **kwargs) -> np.ndarray: From bede8723b560234f2499e1f5c4fdc230c33d2c26 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Mon, 24 Jun 2024 15:30:02 +0300 Subject: [PATCH 09/52] Fix: remove redundant `BaseEstimator` inheritance in `KerasBaseEstimator` --- gordo/machine/model/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index f3d7619b6..caa985b8e 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -19,7 +19,7 @@ import pandas as pd import xarray as xr -from sklearn.base import TransformerMixin, BaseEstimator +from sklearn.base import TransformerMixin from sklearn.metrics import explained_variance_score from sklearn.exceptions import NotFittedError @@ -34,7 +34,7 @@ logger = logging.getLogger(__name__) -class KerasBaseEstimator(BaseWrapper, GordoBase, BaseEstimator): +class KerasBaseEstimator(BaseWrapper, GordoBase): supported_fit_args = [ "batch_size", "epochs", From 06de56f10ea466fce9e860c4909d1899189967d7 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Mon, 24 Jun 2024 15:31:02 +0300 Subject: [PATCH 10/52] Fix: remove redundant `BaseWrapper` alias for `KerasRegressor` --- gordo/machine/model/models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index caa985b8e..d70a7edfd 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -14,7 +14,7 @@ import tensorflow.keras.models from tensorflow.keras.models import load_model, save_model from tensorflow.keras.preprocessing.sequence import pad_sequences, TimeseriesGenerator -from scikeras.wrappers import KerasRegressor as BaseWrapper +from scikeras.wrappers import KerasRegressor import numpy as np import pandas as pd import xarray as xr @@ -34,7 +34,7 @@ logger = logging.getLogger(__name__) -class KerasBaseEstimator(BaseWrapper, GordoBase): +class KerasBaseEstimator(KerasRegressor, GordoBase): supported_fit_args = [ "batch_size", "epochs", @@ -269,7 +269,7 @@ def fit( y = y.values kwargs.setdefault("verbose", 0) history = super().fit(X, y, sample_weight=None, **kwargs) - if isinstance(history, BaseWrapper): + if isinstance(history, KerasRegressor): self.history = history.history_ return self From 5eee6999dd0161971628509337da2f1c7b667ab8 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Mon, 24 Jun 2024 15:36:40 +0300 Subject: [PATCH 11/52] Fix: add super call to KerasBaseEstimator --- gordo/machine/model/models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index d70a7edfd..eb02b4b55 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -77,6 +77,7 @@ def __init__( building function and/or any additional args to be passed to Keras' fit() method """ + super().__init__(**kwargs) self.build_fn = None self.history = None From 3b2bca3920119a2c42141129da8c3868cfcd6287 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Mon, 24 Jun 2024 16:02:07 +0300 Subject: [PATCH 12/52] Fix: no comments --- gordo/machine/model/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index eb02b4b55..7f3927175 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -301,7 +301,7 @@ def get_params(self, **params): Parameters used in this estimator """ params = super().get_params(**params) - params.pop("model", None) + params.pop("build_fn", None) params.update({"kind": self.kind}) params.update(self.kwargs) return params From 78fde460fb66ef4719107f1d7a793978ee20a54a Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Mon, 24 Jun 2024 16:33:25 +0300 Subject: [PATCH 13/52] Fix: refactor `__call__` method of models to `_prepare_model` --- gordo/machine/model/models.py | 16 ++++++++-------- gordo/machine/model/register.py | 12 ++++++------ tests/gordo/machine/model/test_register.py | 6 +++--- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index 7f3927175..56dbe5bfa 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -77,8 +77,8 @@ def __init__( building function and/or any additional args to be passed to Keras' fit() method """ - super().__init__(**kwargs) - self.build_fn = None + model = self._prepare_model() + super().__init__(**kwargs, model=model) self.history = None self.kind = self.load_kind(kind) @@ -301,16 +301,16 @@ def get_params(self, **params): Parameters used in this estimator """ params = super().get_params(**params) - params.pop("build_fn", None) + params.pop("model", None) params.update({"kind": self.kind}) params.update(self.kwargs) return params - def __call__(self): + def _prepare_model(self): module_name, class_name = self.parse_module_path(self.kind) if module_name is None: factories = register_model_builder.factories[self.__class__.__name__] - build_fn = factories[self.kind] + model = factories[self.kind] else: module = importlib.import_module(module_name) if not hasattr(module, class_name): @@ -318,8 +318,8 @@ def __call__(self): "kind: %s, unable to find class %s in module '%s'" % (self.kind, class_name, module_name) ) - build_fn = getattr(module, class_name) - return build_fn(**self.sk_params) + model = getattr(module, class_name) + return model(**self.sk_params) def get_metadata(self): """ @@ -426,7 +426,7 @@ def load_kind(self, kind): def __repr__(self): return f"{self.__class__.__name__}(kind: {pformat(self.kind)})" - def __call__(self): + def _prepare_model(self): """Build Keras model from specification""" if not all(k in self.kind for k in self._expected_keys): raise ValueError( diff --git a/gordo/machine/model/register.py b/gordo/machine/model/register.py index 55fb94c4b..dce61e21e 100644 --- a/gordo/machine/model/register.py +++ b/gordo/machine/model/register.py @@ -48,22 +48,22 @@ def special_keras_model_builder(n_features, ...): def __init__(self, type: str): self.type = type - def __call__(self, build_fn: Callable[..., keras.models.Model]): - self._register(self.type, build_fn) - return build_fn + def __call__(self, model: Callable[..., keras.models.Model]): + self._register(self.type, model) + return model @classmethod - def _register(cls, type: str, build_fn: Callable[[int, Any], GordoBase]): + def _register(cls, type: str, model: Callable[[int, Any], GordoBase]): """ Registers a given function as an available factory under this type. """ - cls._validate_func(build_fn) + cls._validate_func(model) # Add function to available factories under this type if type not in cls.factories: cls.factories[type] = dict() - cls.factories[type][build_fn.__name__] = build_fn + cls.factories[type][model.__name__] = model @staticmethod def _validate_func(func): diff --git a/tests/gordo/machine/model/test_register.py b/tests/gordo/machine/model/test_register.py index 47aa0b1ce..97a1369e1 100644 --- a/tests/gordo/machine/model/test_register.py +++ b/tests/gordo/machine/model/test_register.py @@ -15,16 +15,16 @@ def test_fail_no_required_params(self): with self.assertRaises(ValueError): @register_model_builder(type="KerasAutoEncoder") - def build_fn(): + def model(): pass # Pass with required param(s) @register_model_builder(type="KerasAutoEncoder") # pragma: no flakes - def build_fn(n_features): + def model(n_features): pass # Call to ensure that register didn't 'eat' the function - build_fn(1) + model(1) def test_hold_multiple_funcs(self): """ From 31a59a8612b2078f9780f2f85c1bf5a8894ce966 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Mon, 24 Jun 2024 16:38:07 +0300 Subject: [PATCH 14/52] Fix: move assignment of `kind` above the `_prepare_model` call --- gordo/machine/model/models.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index 56dbe5bfa..682cd6c0e 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -77,12 +77,12 @@ def __init__( building function and/or any additional args to be passed to Keras' fit() method """ - model = self._prepare_model() - super().__init__(**kwargs, model=model) - self.history = None - self.kind = self.load_kind(kind) self.kwargs: Dict[str, Any] = kwargs + self.history = None + + model = self._prepare_model() + super().__init__(**kwargs, model=model) @staticmethod def parse_module_path(module_path) -> Tuple[Optional[str], str]: From eaf24b21e35045040e19ed28954c62cd546820bf Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Mon, 24 Jun 2024 16:51:30 +0300 Subject: [PATCH 15/52] Fix: prepare model right before calling fit in keras models --- gordo/machine/model/models.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index 682cd6c0e..4fc76784e 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -81,8 +81,7 @@ def __init__( self.kwargs: Dict[str, Any] = kwargs self.history = None - model = self._prepare_model() - super().__init__(**kwargs, model=model) + super().__init__(**kwargs) @staticmethod def parse_module_path(module_path) -> Tuple[Optional[str], str]: @@ -269,6 +268,8 @@ def fit( if isinstance(y, (pd.DataFrame, xr.DataArray)): y = y.values kwargs.setdefault("verbose", 0) + + self._prepare_model() history = super().fit(X, y, sample_weight=None, **kwargs) if isinstance(history, KerasRegressor): self.history = history.history_ @@ -319,7 +320,7 @@ def _prepare_model(self): % (self.kind, class_name, module_name) ) model = getattr(module, class_name) - return model(**self.sk_params) + self.model = model(**self.sk_params) def get_metadata(self): """ @@ -578,6 +579,7 @@ def fit( # type: ignore primer_x, primer_y = tsg[0] + self._prepare_model() super().fit(X=primer_x, y=primer_y, epochs=1, verbose=0) tsg = create_keras_timeseriesgenerator( From 757d207fc221b3bc29cf9270d4a6296b5419ba2e Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Mon, 24 Jun 2024 23:11:11 +0300 Subject: [PATCH 16/52] Fix: rename `self.history` to `self._history` in `KerasBaseEstimator` --- gordo/machine/model/models.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index 4fc76784e..b0e5050fb 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -79,7 +79,7 @@ def __init__( """ self.kind = self.load_kind(kind) self.kwargs: Dict[str, Any] = kwargs - self.history = None + self._history = None super().__init__(**kwargs) @@ -186,9 +186,9 @@ def __getstate__(self): from tensorflow.python.keras.callbacks import History history = History() - history.history = self.history.history - history.params = self.history.params - history.epoch = self.history.epoch + history.history = self._history.history + history.params = self._history.params + history.epoch = self._history.epoch state["history"] = history return state @@ -269,10 +269,11 @@ def fit( y = y.values kwargs.setdefault("verbose", 0) - self._prepare_model() + if self.model is None: + self._prepare_model() history = super().fit(X, y, sample_weight=None, **kwargs) if isinstance(history, KerasRegressor): - self.history = history.history_ + self._history = history.history_ return self def predict(self, X: np.ndarray, **kwargs) -> np.ndarray: @@ -579,7 +580,6 @@ def fit( # type: ignore primer_x, primer_y = tsg[0] - self._prepare_model() super().fit(X=primer_x, y=primer_y, epochs=1, verbose=0) tsg = create_keras_timeseriesgenerator( From b64dc088cb3a8ef144dfc7c15657673578abcbb4 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Mon, 24 Jun 2024 23:16:29 +0300 Subject: [PATCH 17/52] Fix: rename `self.kind` and `self.kwargs` to `self._kind` and `self._kwargs` in `KerasBaseEstimator` --- gordo/machine/model/models.py | 40 +++++++++++++++++------------------ 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index b0e5050fb..a36646c15 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -77,8 +77,8 @@ def __init__( building function and/or any additional args to be passed to Keras' fit() method """ - self.kind = self.load_kind(kind) - self.kwargs: Dict[str, Any] = kwargs + self._kind = self.load_kind(kind) + self._kwargs: Dict[str, Any] = kwargs self._history = None super().__init__(**kwargs) @@ -156,21 +156,21 @@ def into_definition(self) -> dict: ------- """ - definition = copy(self.kwargs) - definition["kind"] = self.kind + definition = copy(self._kwargs) + definition["kind"] = self._kind return definition @property def sk_params(self): """ Parameters used for scikit learn kwargs""" - fit_args = self.extract_supported_fit_args(self.kwargs) + fit_args = self.extract_supported_fit_args(self._kwargs) if fit_args: - kwargs = deepcopy(self.kwargs) + kwargs = deepcopy(self._kwargs) kwargs.update(serializer.load_params_from_definition(fit_args)) return kwargs else: - return self.kwargs + return self._kwargs def __getstate__(self): @@ -256,7 +256,7 @@ def fit( y = y.reshape(-1, 1) logger.debug(f"Fitting to data of length: {len(X)}") - self.kwargs.update( + self._kwargs.update( { "n_features": self.get_n_features(X), "n_features_out": self.get_n_features_out(y), @@ -304,21 +304,21 @@ def get_params(self, **params): """ params = super().get_params(**params) params.pop("model", None) - params.update({"kind": self.kind}) - params.update(self.kwargs) + params.update({"kind": self._kind}) + params.update(self._kwargs) return params def _prepare_model(self): - module_name, class_name = self.parse_module_path(self.kind) + module_name, class_name = self.parse_module_path(self._kind) if module_name is None: factories = register_model_builder.factories[self.__class__.__name__] - model = factories[self.kind] + model = factories[self._kind] else: module = importlib.import_module(module_name) if not hasattr(module, class_name): raise ValueError( "kind: %s, unable to find class %s in module '%s'" - % (self.kind, class_name, module_name) + % (self._kind, class_name, module_name) ) model = getattr(module, class_name) self.model = model(**self.sk_params) @@ -426,20 +426,20 @@ def load_kind(self, kind): return kind def __repr__(self): - return f"{self.__class__.__name__}(kind: {pformat(self.kind)})" + return f"{self.__class__.__name__}(kind: {pformat(self._kind)})" def _prepare_model(self): """Build Keras model from specification""" - if not all(k in self.kind for k in self._expected_keys): + if not all(k in self._kind for k in self._expected_keys): raise ValueError( - f"Expected spec to have keys: {self._expected_keys}, but found {self.kind.keys()}" + f"Expected spec to have keys: {self._expected_keys}, but found {self._kind.keys()}" ) - logger.debug(f"Building model from spec: {self.kind}") + logger.debug(f"Building model from spec: {self._kind}") - model = serializer.from_definition(self.kind["spec"]) + model = serializer.from_definition(self._kind["spec"]) # Load any compile kwargs as well, such as compile.optimizer which may map to class obj - kwargs = serializer.from_definition(self.kind["compile"]) + kwargs = serializer.from_definition(self._kind["compile"]) model.compile(**kwargs) return model @@ -592,7 +592,7 @@ def fit( # type: ignore gen_kwargs = { k: v - for k, v in {**self.kwargs, **kwargs}.items() + for k, v in {**self._kwargs, **kwargs}.items() if k in self.fit_generator_params } From fe3530b775ba27159fe831c7066b54f542d96477 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Mon, 24 Jun 2024 23:28:35 +0300 Subject: [PATCH 18/52] Fix: refactor `getattr(keras.optimizers, optimizer)` to `keras.optimizers.get(optimizer)` in autoencoders --- gordo/machine/model/factories/feedforward_autoencoder.py | 2 +- gordo/machine/model/factories/lstm_autoencoder.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gordo/machine/model/factories/feedforward_autoencoder.py b/gordo/machine/model/factories/feedforward_autoencoder.py index 1e569dac5..1234fc9f5 100644 --- a/gordo/machine/model/factories/feedforward_autoencoder.py +++ b/gordo/machine/model/factories/feedforward_autoencoder.py @@ -88,7 +88,7 @@ class (e.x. Adam(lr=0.01,beta_1=0.9, beta_2=0.999)). If no arguments are # Instantiate optimizer with kwargs if isinstance(optimizer, str): - Optim = getattr(keras.optimizers, optimizer) + Optim = keras.optimizers.get(optimizer) optimizer = Optim(**optimizer_kwargs) # Final output layer diff --git a/gordo/machine/model/factories/lstm_autoencoder.py b/gordo/machine/model/factories/lstm_autoencoder.py index a0e334fa6..4864321a6 100644 --- a/gordo/machine/model/factories/lstm_autoencoder.py +++ b/gordo/machine/model/factories/lstm_autoencoder.py @@ -90,7 +90,7 @@ class (e.x. Adam(lr=0.01,beta_1=0.9, beta_2=0.999)). If no arguments are # output layer if isinstance(optimizer, str): - Optim = getattr(keras.optimizers, optimizer) + Optim = keras.optimizers.get(optimizer) optimizer = Optim(**optimizer_kwargs) model.add(Dense(units=n_features_out, activation=out_func)) From 9200d0706a7ae6a5f4515bed621fb169585d617a Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Tue, 25 Jun 2024 00:11:21 +0300 Subject: [PATCH 19/52] Fix: refactor `getattr(keras.optimizers, optimizer)` to `keras.optimizers.get(optimizer)` in autoencoders --- gordo/machine/model/factories/feedforward_autoencoder.py | 4 ++-- gordo/machine/model/factories/lstm_autoencoder.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/gordo/machine/model/factories/feedforward_autoencoder.py b/gordo/machine/model/factories/feedforward_autoencoder.py index 1234fc9f5..312d8b69a 100644 --- a/gordo/machine/model/factories/feedforward_autoencoder.py +++ b/gordo/machine/model/factories/feedforward_autoencoder.py @@ -5,7 +5,7 @@ from tensorflow.keras.optimizers import Optimizer from tensorflow.keras import regularizers from tensorflow.keras.layers import Dense -from tensorflow import keras +from tensorflow.python import keras from tensorflow.keras.models import Sequential as KerasSequential from gordo.machine.model.register import register_model_builder @@ -88,7 +88,7 @@ class (e.x. Adam(lr=0.01,beta_1=0.9, beta_2=0.999)). If no arguments are # Instantiate optimizer with kwargs if isinstance(optimizer, str): - Optim = keras.optimizers.get(optimizer) + Optim = getattr(keras.optimizer_v1, optimizer) optimizer = Optim(**optimizer_kwargs) # Final output layer diff --git a/gordo/machine/model/factories/lstm_autoencoder.py b/gordo/machine/model/factories/lstm_autoencoder.py index 4864321a6..a30cedd49 100644 --- a/gordo/machine/model/factories/lstm_autoencoder.py +++ b/gordo/machine/model/factories/lstm_autoencoder.py @@ -3,10 +3,10 @@ from typing import Tuple, Union, Dict, Any import tensorflow -from tensorflow import keras from tensorflow.keras.optimizers import Optimizer from tensorflow.keras.layers import Dense, LSTM from tensorflow.keras.models import Sequential as KerasSequential +from tensorflow.python import keras from gordo.machine.model.register import register_model_builder from gordo.machine.model.factories.utils import hourglass_calc_dims, check_dim_func_len @@ -90,7 +90,7 @@ class (e.x. Adam(lr=0.01,beta_1=0.9, beta_2=0.999)). If no arguments are # output layer if isinstance(optimizer, str): - Optim = keras.optimizers.get(optimizer) + Optim = getattr(keras.optimizer_v1, optimizer) optimizer = Optim(**optimizer_kwargs) model.add(Dense(units=n_features_out, activation=out_func)) From 3f2a31362c19a314cf6b042f1e41cf0b8784097b Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Tue, 25 Jun 2024 00:32:26 +0300 Subject: [PATCH 20/52] Fix: refactor `getattr(keras.optimizers, optimizer)` to `keras.optimizers.get(optimizer)` in autoencoders --- gordo/machine/model/factories/feedforward_autoencoder.py | 5 ++--- gordo/machine/model/factories/lstm_autoencoder.py | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/gordo/machine/model/factories/feedforward_autoencoder.py b/gordo/machine/model/factories/feedforward_autoencoder.py index 312d8b69a..0c0ff44c7 100644 --- a/gordo/machine/model/factories/feedforward_autoencoder.py +++ b/gordo/machine/model/factories/feedforward_autoencoder.py @@ -5,7 +5,7 @@ from tensorflow.keras.optimizers import Optimizer from tensorflow.keras import regularizers from tensorflow.keras.layers import Dense -from tensorflow.python import keras +from tensorflow import keras from tensorflow.keras.models import Sequential as KerasSequential from gordo.machine.model.register import register_model_builder @@ -88,8 +88,7 @@ class (e.x. Adam(lr=0.01,beta_1=0.9, beta_2=0.999)). If no arguments are # Instantiate optimizer with kwargs if isinstance(optimizer, str): - Optim = getattr(keras.optimizer_v1, optimizer) - optimizer = Optim(**optimizer_kwargs) + optimizer = keras.optimizers.get({"class_name": optimizer, "config": optimizer_kwargs}) # Final output layer model.add(Dense(n_features_out, activation=out_func)) diff --git a/gordo/machine/model/factories/lstm_autoencoder.py b/gordo/machine/model/factories/lstm_autoencoder.py index a30cedd49..02eb1eec1 100644 --- a/gordo/machine/model/factories/lstm_autoencoder.py +++ b/gordo/machine/model/factories/lstm_autoencoder.py @@ -6,7 +6,7 @@ from tensorflow.keras.optimizers import Optimizer from tensorflow.keras.layers import Dense, LSTM from tensorflow.keras.models import Sequential as KerasSequential -from tensorflow.python import keras +from tensorflow import keras from gordo.machine.model.register import register_model_builder from gordo.machine.model.factories.utils import hourglass_calc_dims, check_dim_func_len @@ -90,8 +90,7 @@ class (e.x. Adam(lr=0.01,beta_1=0.9, beta_2=0.999)). If no arguments are # output layer if isinstance(optimizer, str): - Optim = getattr(keras.optimizer_v1, optimizer) - optimizer = Optim(**optimizer_kwargs) + optimizer = keras.optimizers.get({"class_name": optimizer, "config": optimizer_kwargs}) model.add(Dense(units=n_features_out, activation=out_func)) From c520dbb04fad632baffba56cd65c2c5495945aec Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Tue, 25 Jun 2024 00:38:46 +0300 Subject: [PATCH 21/52] Fix: remove `save_format` param from `save_model` in `KerasBaseEstimator.__getstate__` --- gordo/machine/model/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index a36646c15..06a7b8d8c 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -179,7 +179,7 @@ def __getstate__(self): if hasattr(self, "model") and self.model is not None: buf = io.BytesIO() with h5py.File(buf, compression="lzf", mode="w") as h5: - save_model(self.model, h5, overwrite=True, save_format="h5") + save_model(self.model, h5, overwrite=True) buf.seek(0) state["model"] = buf if hasattr(self, "history"): From b93af57cc7593a8e75ef727ec5de747721310934 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Tue, 25 Jun 2024 00:49:16 +0300 Subject: [PATCH 22/52] Fix: remove `save_format` param from `save_model` in `KerasBaseEstimator.__getstate__` --- gordo/machine/model/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index 06a7b8d8c..c230b75b6 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -179,7 +179,7 @@ def __getstate__(self): if hasattr(self, "model") and self.model is not None: buf = io.BytesIO() with h5py.File(buf, compression="lzf", mode="w") as h5: - save_model(self.model, h5, overwrite=True) + self.model.save(h5, overwrite=True, save_format="h5") buf.seek(0) state["model"] = buf if hasattr(self, "history"): From 3696b115e46a313630011a55842a67caf954beff Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Tue, 25 Jun 2024 00:52:39 +0300 Subject: [PATCH 23/52] Fix: formatting --- gordo/machine/model/factories/feedforward_autoencoder.py | 4 +++- gordo/machine/model/factories/lstm_autoencoder.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/gordo/machine/model/factories/feedforward_autoencoder.py b/gordo/machine/model/factories/feedforward_autoencoder.py index 0c0ff44c7..1c4f0d791 100644 --- a/gordo/machine/model/factories/feedforward_autoencoder.py +++ b/gordo/machine/model/factories/feedforward_autoencoder.py @@ -88,7 +88,9 @@ class (e.x. Adam(lr=0.01,beta_1=0.9, beta_2=0.999)). If no arguments are # Instantiate optimizer with kwargs if isinstance(optimizer, str): - optimizer = keras.optimizers.get({"class_name": optimizer, "config": optimizer_kwargs}) + optimizer = keras.optimizers.get( + {"class_name": optimizer, "config": optimizer_kwargs} + ) # Final output layer model.add(Dense(n_features_out, activation=out_func)) diff --git a/gordo/machine/model/factories/lstm_autoencoder.py b/gordo/machine/model/factories/lstm_autoencoder.py index 02eb1eec1..1b43b4c1a 100644 --- a/gordo/machine/model/factories/lstm_autoencoder.py +++ b/gordo/machine/model/factories/lstm_autoencoder.py @@ -90,7 +90,9 @@ class (e.x. Adam(lr=0.01,beta_1=0.9, beta_2=0.999)). If no arguments are # output layer if isinstance(optimizer, str): - optimizer = keras.optimizers.get({"class_name": optimizer, "config": optimizer_kwargs}) + optimizer = keras.optimizers.get( + {"class_name": optimizer, "config": optimizer_kwargs} + ) model.add(Dense(units=n_features_out, activation=out_func)) From 92a537fffb8d85a207cfaee70e9c5c582b86d16e Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Tue, 25 Jun 2024 01:14:10 +0300 Subject: [PATCH 24/52] Fix: change saving to tempfile --- gordo/machine/model/models.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index c230b75b6..23647325e 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -4,6 +4,7 @@ import logging import io import importlib +import tempfile from pprint import pformat from typing import Union, Callable, Dict, Any, Optional, Tuple from abc import ABCMeta @@ -177,11 +178,11 @@ def __getstate__(self): state = self.__dict__.copy() if hasattr(self, "model") and self.model is not None: - buf = io.BytesIO() - with h5py.File(buf, compression="lzf", mode="w") as h5: - self.model.save(h5, overwrite=True, save_format="h5") - buf.seek(0) - state["model"] = buf + with tempfile.NamedTemporaryFile(suffix='.h5') as tf: + with h5py.File(tf.name, compression="lzf", mode="w") as h5: + save_model(self.model, h5, overwrite=True) + tf.seek(0) + state["model"] = tf if hasattr(self, "history"): from tensorflow.python.keras.callbacks import History From 7c8c1403b90cf84e10e09e926320917c66cb0df1 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Tue, 25 Jun 2024 01:17:41 +0300 Subject: [PATCH 25/52] Fix: formatting --- gordo/machine/model/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index 23647325e..4b5f944bc 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -178,7 +178,7 @@ def __getstate__(self): state = self.__dict__.copy() if hasattr(self, "model") and self.model is not None: - with tempfile.NamedTemporaryFile(suffix='.h5') as tf: + with tempfile.NamedTemporaryFile(suffix=".h5") as tf: with h5py.File(tf.name, compression="lzf", mode="w") as h5: save_model(self.model, h5, overwrite=True) tf.seek(0) From 0150f0fd9c72951ecc9703e564ef04796e27c4ad Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Tue, 25 Jun 2024 01:31:06 +0300 Subject: [PATCH 26/52] Fix: save model as .keras temp file --- gordo/machine/model/models.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index 4b5f944bc..c1ca31dd6 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -178,11 +178,10 @@ def __getstate__(self): state = self.__dict__.copy() if hasattr(self, "model") and self.model is not None: - with tempfile.NamedTemporaryFile(suffix=".h5") as tf: - with h5py.File(tf.name, compression="lzf", mode="w") as h5: - save_model(self.model, h5, overwrite=True) - tf.seek(0) - state["model"] = tf + with tempfile.NamedTemporaryFile("w", suffix=".keras") as tf: + save_model(self.model, tf, overwrite=True) + tf.seek(0) + state["model"] = tf if hasattr(self, "history"): from tensorflow.python.keras.callbacks import History From 8f738fb0aae234c85684803e811180e42b096bbf Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Tue, 25 Jun 2024 10:34:32 +0300 Subject: [PATCH 27/52] Fix: save model as .keras temp file --- gordo/machine/model/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index c1ca31dd6..41f172bd5 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -179,7 +179,7 @@ def __getstate__(self): if hasattr(self, "model") and self.model is not None: with tempfile.NamedTemporaryFile("w", suffix=".keras") as tf: - save_model(self.model, tf, overwrite=True) + save_model(self.model, tf.name, overwrite=True) tf.seek(0) state["model"] = tf if hasattr(self, "history"): From a8dbb8ced995fe53a5344bbc40eaf8e81017fdce Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Tue, 25 Jun 2024 10:49:08 +0300 Subject: [PATCH 28/52] Fix: save model as .keras temp file --- gordo/machine/model/models.py | 5 +++-- tests/gordo/serializer/test_serializer_from_definition.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index 41f172bd5..a46edf3f0 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -180,8 +180,9 @@ def __getstate__(self): if hasattr(self, "model") and self.model is not None: with tempfile.NamedTemporaryFile("w", suffix=".keras") as tf: save_model(self.model, tf.name, overwrite=True) - tf.seek(0) - state["model"] = tf + with open(tf.name, "rb") as inf: + buf = io.BytesIO(inf.read()) + state["model"] = buf if hasattr(self, "history"): from tensorflow.python.keras.callbacks import History diff --git a/tests/gordo/serializer/test_serializer_from_definition.py b/tests/gordo/serializer/test_serializer_from_definition.py index b97ee3e9c..61cc80d16 100644 --- a/tests/gordo/serializer/test_serializer_from_definition.py +++ b/tests/gordo/serializer/test_serializer_from_definition.py @@ -278,4 +278,4 @@ def _verify_pipe(self, pipe, model, model_kind): # STEP 4 TEST: Finally, the last step should be a KerasModel step4 = pipe.steps[3][1] self.assertIsInstance(step4, model) - self.assertTrue(step4.kind, model_kind) + self.assertTrue(step4._kind, model_kind) From 7f4e2b338b16440e1d33f279a43edd522d281a1f Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Tue, 25 Jun 2024 10:55:05 +0300 Subject: [PATCH 29/52] Fix: load model as .keras temp file --- gordo/machine/model/models.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index a46edf3f0..1069c11a9 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -195,8 +195,7 @@ def __getstate__(self): def __setstate__(self, state): if "model" in state: - with h5py.File(state["model"], compression="lzf", mode="r") as h5: - state["model"] = load_model(h5, compile=False) + state["model"] = load_model(state["model"], compile=False) self.__dict__ = state return self From 5920784efe5e62303f7b849058ab92c5105942cd Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Tue, 25 Jun 2024 11:02:06 +0300 Subject: [PATCH 30/52] Fix: load model as .keras temp file --- gordo/machine/model/models.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index 1069c11a9..62dbd3fd0 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -195,7 +195,9 @@ def __getstate__(self): def __setstate__(self, state): if "model" in state: - state["model"] = load_model(state["model"], compile=False) + with tempfile.NamedTemporaryFile("w", suffix=".keras") as tf: + tf.write(state["model"]) + state["model"] = load_model(tf.name, compile=False) self.__dict__ = state return self From f138c273abe4c90088cd74e10f7b2dc9f2576330 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Tue, 25 Jun 2024 11:07:38 +0300 Subject: [PATCH 31/52] Fix: load model as .keras temp file --- gordo/machine/model/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index 62dbd3fd0..519306ee8 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -195,7 +195,7 @@ def __getstate__(self): def __setstate__(self, state): if "model" in state: - with tempfile.NamedTemporaryFile("w", suffix=".keras") as tf: + with tempfile.NamedTemporaryFile("wb", suffix=".keras") as tf: tf.write(state["model"]) state["model"] = load_model(tf.name, compile=False) self.__dict__ = state From c6000a2d839745438ccaa3e1ae383e03aed89bc3 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Tue, 25 Jun 2024 11:13:14 +0300 Subject: [PATCH 32/52] Fix: adjust test for argo versions --- tests/gordo/workflow/test_helpers.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/gordo/workflow/test_helpers.py b/tests/gordo/workflow/test_helpers.py index 56919193e..78ef29a01 100644 --- a/tests/gordo/workflow/test_helpers.py +++ b/tests/gordo/workflow/test_helpers.py @@ -3,6 +3,7 @@ import pytest from mock import patch from packaging import version +from packaging.version import InvalidVersion from gordo.workflow.workflow_generator.helpers import ( determine_argo_version, @@ -17,7 +18,11 @@ def test_parse_argo_version(): parsed_version = parse_argo_version("2.12.11") assert type(parsed_version) is version.Version assert str(parsed_version) == "2.12.11" - assert parse_argo_version("wrong_version") is None + + +def test_parse_argo_version_error(): + with pytest.raises(InvalidVersion): + parse_argo_version("wrong_version") def create_completed_process(return_code, stdout): From dd2f98a95b1adad0cebc09cec3b7f35f15ef78b3 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Tue, 25 Jun 2024 11:19:13 +0300 Subject: [PATCH 33/52] Fix: save bytes instead of bytesio to model state --- gordo/machine/model/models.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index 519306ee8..b78abdaa7 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -181,8 +181,7 @@ def __getstate__(self): with tempfile.NamedTemporaryFile("w", suffix=".keras") as tf: save_model(self.model, tf.name, overwrite=True) with open(tf.name, "rb") as inf: - buf = io.BytesIO(inf.read()) - state["model"] = buf + state["model"] = inf.read() if hasattr(self, "history"): from tensorflow.python.keras.callbacks import History From 0c2aef81114de38fc06e580f1f10902db3419b1c Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Tue, 25 Jun 2024 12:11:02 +0300 Subject: [PATCH 34/52] Fix: skip loading unitialized model from state This is due to the fact that `TransformedTargetRegressor` receives a `regressor` as input, then clones it for every `fit` and sets it to `regressor_`, leaving the original `regressor` uninitialized forever. --- gordo/machine/model/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index b78abdaa7..10d169083 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -193,7 +193,7 @@ def __getstate__(self): return state def __setstate__(self, state): - if "model" in state: + if "model" in state and state["model"] is not None: with tempfile.NamedTemporaryFile("wb", suffix=".keras") as tf: tf.write(state["model"]) state["model"] = load_model(tf.name, compile=False) From c2dbc6b2df54011f0335e7fa91ee3e8822c9369c Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Tue, 25 Jun 2024 12:26:07 +0300 Subject: [PATCH 35/52] Fix: adjust test for serializer --- tests/gordo/serializer/test_serializer_into_definition.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/gordo/serializer/test_serializer_into_definition.py b/tests/gordo/serializer/test_serializer_into_definition.py index 95641bc91..941856913 100644 --- a/tests/gordo/serializer/test_serializer_into_definition.py +++ b/tests/gordo/serializer/test_serializer_into_definition.py @@ -174,6 +174,7 @@ def test_into_definition(variations_of_same_pipeline): - gordo.machine.model.models.KerasAutoEncoder: kind: feedforward_hourglass verbose: false + verbose_feature_names_out: true """ expected_definition = yaml.safe_load(expected_definition) From 0ca65e310fc8f3103cba38129e97fe25776fe8b0 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Tue, 25 Jun 2024 12:28:04 +0300 Subject: [PATCH 36/52] Fix: adjust test for serializer --- tests/gordo/serializer/test_serializer_into_definition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/gordo/serializer/test_serializer_into_definition.py b/tests/gordo/serializer/test_serializer_into_definition.py index 941856913..69649ab27 100644 --- a/tests/gordo/serializer/test_serializer_into_definition.py +++ b/tests/gordo/serializer/test_serializer_into_definition.py @@ -171,10 +171,10 @@ def test_into_definition(variations_of_same_pipeline): verbose: false transformer_weights: null verbose: false + verbose_feature_names_out: true - gordo.machine.model.models.KerasAutoEncoder: kind: feedforward_hourglass verbose: false - verbose_feature_names_out: true """ expected_definition = yaml.safe_load(expected_definition) From 8432d16963e8e9f4e7cbfbbc7c2d6985581a9c38 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Tue, 25 Jun 2024 12:35:36 +0300 Subject: [PATCH 37/52] Fix: rename `KerasLSTMBaseEstimator` attributes to underscored prefixed --- gordo/machine/model/models.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index 10d169083..a9e033946 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -2,7 +2,6 @@ import abc import logging -import io import importlib import tempfile from pprint import pformat @@ -11,7 +10,6 @@ from copy import copy, deepcopy from importlib.util import find_spec -import h5py import tensorflow.keras.models from tensorflow.keras.models import load_model, save_model from tensorflow.keras.preprocessing.sequence import pad_sequences, TimeseriesGenerator @@ -481,11 +479,11 @@ def __init__( Any arguments which are passed to the factory building function and/or any additional args to be passed to the intermediate fit method. """ - self.lookback_window = lookback_window - self.batch_size = batch_size - kwargs["lookback_window"] = lookback_window - kwargs["kind"] = kind - kwargs["batch_size"] = batch_size + self._lookback_window = lookback_window + self._batch_size = batch_size + kwargs["_lookback_window"] = lookback_window + kwargs["_kind"] = kind + kwargs["_batch_size"] = batch_size # fit_generator_params is a set of strings with the keyword arguments of # Keras fit_generator method (excluding "shuffle" as this will be hardcoded). @@ -535,7 +533,7 @@ def _validate_and_fix_size_of_X(self, X): ) X = X.reshape(len(X), 1) - if self.lookback_window >= X.shape[0]: + if self._lookback_window >= X.shape[0]: raise ValueError( "For KerasLSTMForecast lookback_window must be < size of X" ) @@ -571,11 +569,11 @@ def fit( # type: ignore # model using the scikit-learn wrapper. tsg = create_keras_timeseriesgenerator( X=X[ - : self.lookahead + self.lookback_window + : self.lookahead + self._lookback_window ], # We only need a bit of the data - y=y[: self.lookahead + self.lookback_window], + y=y[: self.lookahead + self._lookback_window], batch_size=1, - lookback_window=self.lookback_window, + lookback_window=self._lookback_window, lookahead=self.lookahead, ) @@ -587,7 +585,7 @@ def fit( # type: ignore X=X, y=y, batch_size=self.batch_size, - lookback_window=self.lookback_window, + lookback_window=self._lookback_window, lookahead=self.lookahead, ) @@ -640,7 +638,7 @@ def predict(self, X: np.ndarray, **kwargs) -> np.ndarray: X=X, y=X, batch_size=10000, - lookback_window=self.lookback_window, + lookback_window=self._lookback_window, lookahead=self.lookahead, ) kwargs.setdefault("verbose", 0) From 45155e4cf8463df47893a3c4efa9330875b69f51 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Tue, 25 Jun 2024 12:40:17 +0300 Subject: [PATCH 38/52] Fix: rename `KerasLSTMBaseEstimator` attributes to underscored prefixed --- gordo/machine/model/models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index a9e033946..c8a23fec3 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -481,9 +481,9 @@ def __init__( """ self._lookback_window = lookback_window self._batch_size = batch_size - kwargs["_lookback_window"] = lookback_window - kwargs["_kind"] = kind - kwargs["_batch_size"] = batch_size + kwargs["lookback_window"] = lookback_window + kwargs["kind"] = kind + kwargs["batch_size"] = batch_size # fit_generator_params is a set of strings with the keyword arguments of # Keras fit_generator method (excluding "shuffle" as this will be hardcoded). From a7ba4fcc4df1e193a87288cbf821042d6d0e1631 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Tue, 25 Jun 2024 14:10:32 +0300 Subject: [PATCH 39/52] Fix: do not propagate kwargs to `super().__init__` of KerasBaseEstimator --- gordo/machine/model/models.py | 42 +++++++++++++++++------------------ 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index c8a23fec3..c8c714b56 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -76,11 +76,11 @@ def __init__( building function and/or any additional args to be passed to Keras' fit() method """ - self._kind = self.load_kind(kind) - self._kwargs: Dict[str, Any] = kwargs + self.kind = self.load_kind(kind) + self.kwargs: Dict[str, Any] = kwargs self._history = None - super().__init__(**kwargs) + super().__init__() @staticmethod def parse_module_path(module_path) -> Tuple[Optional[str], str]: @@ -155,21 +155,21 @@ def into_definition(self) -> dict: ------- """ - definition = copy(self._kwargs) - definition["kind"] = self._kind + definition = copy(self.kwargs) + definition["kind"] = self.kind return definition @property def sk_params(self): """ Parameters used for scikit learn kwargs""" - fit_args = self.extract_supported_fit_args(self._kwargs) + fit_args = self.extract_supported_fit_args(self.kwargs) if fit_args: - kwargs = deepcopy(self._kwargs) + kwargs = deepcopy(self.kwargs) kwargs.update(serializer.load_params_from_definition(fit_args)) return kwargs else: - return self._kwargs + return self.kwargs def __getstate__(self): @@ -255,7 +255,7 @@ def fit( y = y.reshape(-1, 1) logger.debug(f"Fitting to data of length: {len(X)}") - self._kwargs.update( + self.kwargs.update( { "n_features": self.get_n_features(X), "n_features_out": self.get_n_features_out(y), @@ -303,21 +303,21 @@ def get_params(self, **params): """ params = super().get_params(**params) params.pop("model", None) - params.update({"kind": self._kind}) - params.update(self._kwargs) + params.update({"kind": self.kind}) + params.update(self.kwargs) return params def _prepare_model(self): - module_name, class_name = self.parse_module_path(self._kind) + module_name, class_name = self.parse_module_path(self.kind) if module_name is None: factories = register_model_builder.factories[self.__class__.__name__] - model = factories[self._kind] + model = factories[self.kind] else: module = importlib.import_module(module_name) if not hasattr(module, class_name): raise ValueError( "kind: %s, unable to find class %s in module '%s'" - % (self._kind, class_name, module_name) + % (self.kind, class_name, module_name) ) model = getattr(module, class_name) self.model = model(**self.sk_params) @@ -425,20 +425,20 @@ def load_kind(self, kind): return kind def __repr__(self): - return f"{self.__class__.__name__}(kind: {pformat(self._kind)})" + return f"{self.__class__.__name__}(kind: {pformat(self.kind)})" def _prepare_model(self): """Build Keras model from specification""" - if not all(k in self._kind for k in self._expected_keys): + if not all(k in self.kind for k in self._expected_keys): raise ValueError( - f"Expected spec to have keys: {self._expected_keys}, but found {self._kind.keys()}" + f"Expected spec to have keys: {self._expected_keys}, but found {self.kind.keys()}" ) - logger.debug(f"Building model from spec: {self._kind}") + logger.debug(f"Building model from spec: {self.kind}") - model = serializer.from_definition(self._kind["spec"]) + model = serializer.from_definition(self.kind["spec"]) # Load any compile kwargs as well, such as compile.optimizer which may map to class obj - kwargs = serializer.from_definition(self._kind["compile"]) + kwargs = serializer.from_definition(self.kind["compile"]) model.compile(**kwargs) return model @@ -591,7 +591,7 @@ def fit( # type: ignore gen_kwargs = { k: v - for k, v in {**self._kwargs, **kwargs}.items() + for k, v in {**self.kwargs, **kwargs}.items() if k in self.fit_generator_params } From c97c5a5b8fa94366d43d049a6565fe476ec07765 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Tue, 25 Jun 2024 14:34:10 +0300 Subject: [PATCH 40/52] Fix: do not propagate kwargs to `super().__init__` of KerasBaseEstimator --- gordo/machine/model/models.py | 6 +++--- gordo/serializer/from_definition.py | 2 +- tests/gordo/test_version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index c8c714b56..153b77044 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -80,7 +80,7 @@ def __init__( self.kwargs: Dict[str, Any] = kwargs self._history = None - super().__init__() + KerasRegressor.__init__(self) @staticmethod def parse_module_path(module_path) -> Tuple[Optional[str], str]: @@ -479,8 +479,8 @@ def __init__( Any arguments which are passed to the factory building function and/or any additional args to be passed to the intermediate fit method. """ - self._lookback_window = lookback_window - self._batch_size = batch_size + self.lookback_window = lookback_window + self.batch_size = batch_size kwargs["lookback_window"] = lookback_window kwargs["kind"] = kind kwargs["batch_size"] = batch_size diff --git a/gordo/serializer/from_definition.py b/gordo/serializer/from_definition.py index fa344e253..8e748a12b 100644 --- a/gordo/serializer/from_definition.py +++ b/gordo/serializer/from_definition.py @@ -259,7 +259,7 @@ def _build_callbacks(definitions: list): -------- >>> callbacks=_build_callbacks([{'tensorflow.keras.callbacks.EarlyStopping': {'monitor': 'val_loss,', 'patience': 10}}]) >>> type(callbacks[0]) - + Returns ------- diff --git a/tests/gordo/test_version.py b/tests/gordo/test_version.py index 90e0a5b98..8feda84d3 100644 --- a/tests/gordo/test_version.py +++ b/tests/gordo/test_version.py @@ -18,7 +18,7 @@ def test_version(): "version,expected", [ ("1.1.1", (1, 1, False)), - ("1.1.1.dev-a1", (1, 1, True)), + ("1.1.1.dev-1", (1, 1, True)), ("0.55.0-rc1", (0, 55, True)), ("0.0.0", (0, 0, True)), ], From ba15f7589a63201d41b61e89330bdd485dfca808 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Tue, 25 Jun 2024 14:34:54 +0300 Subject: [PATCH 41/52] Fix: do not propagate kwargs to `super().__init__` of KerasBaseEstimator --- gordo/machine/model/models.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index 153b77044..39f52f4a0 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -533,7 +533,7 @@ def _validate_and_fix_size_of_X(self, X): ) X = X.reshape(len(X), 1) - if self._lookback_window >= X.shape[0]: + if self.lookback_window >= X.shape[0]: raise ValueError( "For KerasLSTMForecast lookback_window must be < size of X" ) @@ -569,11 +569,11 @@ def fit( # type: ignore # model using the scikit-learn wrapper. tsg = create_keras_timeseriesgenerator( X=X[ - : self.lookahead + self._lookback_window + : self.lookahead + self.lookback_window ], # We only need a bit of the data - y=y[: self.lookahead + self._lookback_window], + y=y[: self.lookahead + self.lookback_window], batch_size=1, - lookback_window=self._lookback_window, + lookback_window=self.lookback_window, lookahead=self.lookahead, ) @@ -585,7 +585,7 @@ def fit( # type: ignore X=X, y=y, batch_size=self.batch_size, - lookback_window=self._lookback_window, + lookback_window=self.lookback_window, lookahead=self.lookahead, ) @@ -638,7 +638,7 @@ def predict(self, X: np.ndarray, **kwargs) -> np.ndarray: X=X, y=X, batch_size=10000, - lookback_window=self._lookback_window, + lookback_window=self.lookback_window, lookahead=self.lookahead, ) kwargs.setdefault("verbose", 0) From 17ad3862637aadd8b2b76d372391c7d76234f2e3 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Tue, 25 Jun 2024 14:38:23 +0300 Subject: [PATCH 42/52] Fix: do not propagate kwargs to `super().__init__` of KerasBaseEstimator --- tests/gordo/serializer/test_serializer_from_definition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/gordo/serializer/test_serializer_from_definition.py b/tests/gordo/serializer/test_serializer_from_definition.py index 61cc80d16..b97ee3e9c 100644 --- a/tests/gordo/serializer/test_serializer_from_definition.py +++ b/tests/gordo/serializer/test_serializer_from_definition.py @@ -278,4 +278,4 @@ def _verify_pipe(self, pipe, model, model_kind): # STEP 4 TEST: Finally, the last step should be a KerasModel step4 = pipe.steps[3][1] self.assertIsInstance(step4, model) - self.assertTrue(step4._kind, model_kind) + self.assertTrue(step4.kind, model_kind) From 033d2df2e5e0affbac5de2ef12009bde749a25dc Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Tue, 25 Jun 2024 15:40:43 +0300 Subject: [PATCH 43/52] Fix: propagate batch_size to `super().__init__` of KerasBaseEstimator --- gordo/machine/model/models.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index 39f52f4a0..30ef7749b 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -80,7 +80,7 @@ def __init__( self.kwargs: Dict[str, Any] = kwargs self._history = None - KerasRegressor.__init__(self) + KerasRegressor.__init__(self, batch_size=kwargs.get("batch_size")) @staticmethod def parse_module_path(module_path) -> Tuple[Optional[str], str]: @@ -480,7 +480,6 @@ def __init__( additional args to be passed to the intermediate fit method. """ self.lookback_window = lookback_window - self.batch_size = batch_size kwargs["lookback_window"] = lookback_window kwargs["kind"] = kind kwargs["batch_size"] = batch_size From aca16308af0cf383ec980f0dac7674b2d38a8cdc Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Tue, 25 Jun 2024 17:49:09 +0300 Subject: [PATCH 44/52] Fix: set `input_shape` to tensorflow layers definition in `KerasRawModelRegressor` docstring --- gordo/machine/model/models.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index 30ef7749b..a220e60bb 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -405,8 +405,12 @@ class KerasRawModelRegressor(KerasAutoEncoder): ... layers: ... - tensorflow.keras.layers.Dense: ... units: 4 + ... input_shape: + ... - 4 ... - tensorflow.keras.layers.Dense: ... units: 1 + ... input_shape: + ... - 1 ... ''' >>> config = yaml.safe_load(config_str) >>> model = KerasRawModelRegressor(kind=config) @@ -414,8 +418,10 @@ class KerasRawModelRegressor(KerasAutoEncoder): >>> X, y = np.random.random((10, 4)), np.random.random((10, 1)) >>> model.fit(X, y, verbose=0) KerasRawModelRegressor(kind: {'compile': {'loss': 'mse', 'optimizer': 'adam'}, - 'spec': {'tensorflow.keras.models.Sequential': {'layers': [{'tensorflow.keras.layers.Dense': {'units': 4}}, - {'tensorflow.keras.layers.Dense': {'units': 1}}]}}}) + 'spec': {'tensorflow.keras.models.Sequential': {'layers': [{'tensorflow.keras.layers.Dense': {'input_shape': [4], + 'units': 4}}, + {'tensorflow.keras.layers.Dense': {'input_shape': [1], + 'units': 1}}]}}}) >>> out = model.predict(X) """ @@ -439,9 +445,9 @@ def _prepare_model(self): # Load any compile kwargs as well, such as compile.optimizer which may map to class obj kwargs = serializer.from_definition(self.kind["compile"]) - model.compile(**kwargs) - return model + + self.model = model class KerasLSTMBaseEstimator(KerasBaseEstimator, TransformerMixin, metaclass=ABCMeta): From 96ba313f36fae5c997b8c6aaaccae0716a8e2de6 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Tue, 25 Jun 2024 18:32:57 +0300 Subject: [PATCH 45/52] Fix: store history for model in `_history` and use proper `regressor_` in `_extract_metadata_from_model` --- gordo/builder/build_model.py | 15 ++++++++------- gordo/machine/model/models.py | 14 +++++++------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/gordo/builder/build_model.py b/gordo/builder/build_model.py index d7d6109ab..5e4a9f2d2 100644 --- a/gordo/builder/build_model.py +++ b/gordo/builder/build_model.py @@ -555,13 +555,14 @@ def _extract_metadata_from_model( # Continue to look at object values in case, we decided to have a GordoBase # which also had a GordoBase as a parameter/attribute, but will satisfy BaseEstimators # which can take a GordoBase model as a parameter, which will then have metadata to get - for val in model.__dict__.values(): - if isinstance(val, Pipeline): - metadata.update( - ModelBuilder._extract_metadata_from_model(val.steps[-1][1]) - ) - elif isinstance(val, GordoBase) or isinstance(val, BaseEstimator): - metadata.update(ModelBuilder._extract_metadata_from_model(val)) + for key, val in model.__dict__.items(): + if key.endswith('_'): # keras3 clones the regressor into regressor_ and never updates original regressor + if isinstance(val, Pipeline): + metadata.update( + ModelBuilder._extract_metadata_from_model(val.steps[-1][1]) + ) + elif isinstance(val, GordoBase) or isinstance(val, BaseEstimator): + metadata.update(ModelBuilder._extract_metadata_from_model(val)) return metadata @property diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index a220e60bb..1c7502afa 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -180,7 +180,7 @@ def __getstate__(self): save_model(self.model, tf.name, overwrite=True) with open(tf.name, "rb") as inf: state["model"] = inf.read() - if hasattr(self, "history"): + if hasattr(self, "_history"): from tensorflow.python.keras.callbacks import History history = History() @@ -270,9 +270,9 @@ def fit( if self.model is None: self._prepare_model() - history = super().fit(X, y, sample_weight=None, **kwargs) - if isinstance(history, KerasRegressor): - self._history = history.history_ + model = super().fit(X, y, sample_weight=None, **kwargs) + if isinstance(model, KerasRegressor): + self._history = model.model.history return self def predict(self, X: np.ndarray, **kwargs) -> np.ndarray: @@ -335,9 +335,9 @@ def get_metadata(self): ------- Metadata dictionary, including a history object if present """ - if hasattr(self, "model") and hasattr(self, "history"): - history = self.history.history - history["params"] = self.history.params + if hasattr(self, "model") and hasattr(self, "_history"): + history = self._history.history + history["params"] = self._history.params return {"history": history} else: return {} From 796f9a1c6cbef9beee0831206e9da9bc0796b2b4 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Tue, 25 Jun 2024 18:36:34 +0300 Subject: [PATCH 46/52] Fix: formatting --- gordo/builder/build_model.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gordo/builder/build_model.py b/gordo/builder/build_model.py index 5e4a9f2d2..73eb358de 100644 --- a/gordo/builder/build_model.py +++ b/gordo/builder/build_model.py @@ -556,7 +556,9 @@ def _extract_metadata_from_model( # which also had a GordoBase as a parameter/attribute, but will satisfy BaseEstimators # which can take a GordoBase model as a parameter, which will then have metadata to get for key, val in model.__dict__.items(): - if key.endswith('_'): # keras3 clones the regressor into regressor_ and never updates original regressor + if key.endswith( + "_" + ): # keras3 clones the regressor into regressor_ and never updates original regressor if isinstance(val, Pipeline): metadata.update( ModelBuilder._extract_metadata_from_model(val.steps[-1][1]) From e25a71e1cf1ef2892b13b6f3f92a4a3821432753 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Wed, 26 Jun 2024 10:08:27 +0300 Subject: [PATCH 47/52] Fix: adjust `model` and `history` attributes access in `KerasBaseEstimator` and children Also adjust tests --- gordo/machine/model/models.py | 24 +++++++++---------- .../machine/model/test_lstm_autoencoder.py | 10 ++++---- tests/gordo/machine/model/test_model.py | 6 ++--- tests/gordo/machine/model/test_raw_keras.py | 5 ++-- 4 files changed, 23 insertions(+), 22 deletions(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index 1c7502afa..65e1a543a 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -175,19 +175,19 @@ def __getstate__(self): state = self.__dict__.copy() - if hasattr(self, "model") and self.model is not None: + if self.model is not None: with tempfile.NamedTemporaryFile("w", suffix=".keras") as tf: save_model(self.model, tf.name, overwrite=True) with open(tf.name, "rb") as inf: state["model"] = inf.read() - if hasattr(self, "_history"): - from tensorflow.python.keras.callbacks import History - - history = History() - history.history = self._history.history - history.params = self._history.params - history.epoch = self._history.epoch - state["history"] = history + + from tensorflow.python.keras.callbacks import History + + history = History() + history.history = self._history.history + history.params = self._history.params + history.epoch = self._history.epoch + state["history"] = history return state def __setstate__(self, state): @@ -335,7 +335,7 @@ def get_metadata(self): ------- Metadata dictionary, including a history object if present """ - if hasattr(self, "model") and hasattr(self, "_history"): + if self._history is not None: history = self._history.history history["params"] = self._history.params return {"history": history} @@ -373,7 +373,7 @@ def score( ------- Returns the explained variance score """ - if not hasattr(self, "model"): + if self.model is None: raise NotFittedError( f"This {self.__class__.__name__} has not been fitted yet." ) @@ -675,7 +675,7 @@ def score( ------- Returns the explained variance score. """ - if not hasattr(self, "model"): + if self.model is None: raise NotFittedError( f"This {self.__class__.__name__} has not been fitted yet." ) diff --git a/tests/gordo/machine/model/test_lstm_autoencoder.py b/tests/gordo/machine/model/test_lstm_autoencoder.py index de411c433..cbcb674d6 100644 --- a/tests/gordo/machine/model/test_lstm_autoencoder.py +++ b/tests/gordo/machine/model/test_lstm_autoencoder.py @@ -127,22 +127,22 @@ def test_lstm_symmetric_basic(n_features, n_features_out): funcs=("relu", "relu", "tanh", "tanh"), out_func="linear", optimizer="SGD", - optimizer_kwargs={"lr": 0.01}, + optimizer_kwargs={"learning_rate": 0.01}, loss="mse", ) # Ensure that the input dimension to Keras model matches the number of features. - assert model.layers[0].input_shape[2] == n_features + assert model.layers[0].input.shape[2] == n_features # Ensure that the dimension of each encoding layer matches the expected dimension. - assert [model.layers[i].input_shape[2] for i in range(1, 5)] == [4, 3, 2, 1] + assert [model.layers[i].input.shape[2] for i in range(1, 5)] == [4, 3, 2, 1] # Ensure that the dimension of each decoding layer (excluding last decoding layer) # matches the expected dimension. - assert [model.layers[i].input_shape[2] for i in range(5, 8)] == [1, 2, 3] + assert [model.layers[i].input.shape[2] for i in range(5, 8)] == [1, 2, 3] # Ensure that the dimension of last decoding layer matches the expected dimension. - assert model.layers[8].input_shape[1] == 4 + assert model.layers[8].input.shape[1] == 4 # Ensure activation functions in the encoding part (layers 0-3) # match expected activation functions. diff --git a/tests/gordo/machine/model/test_model.py b/tests/gordo/machine/model/test_model.py index f068b35f1..46f0ec155 100644 --- a/tests/gordo/machine/model/test_model.py +++ b/tests/gordo/machine/model/test_model.py @@ -145,15 +145,15 @@ def test_save_load(model, kind): # Assert that epochs list, history dict and params dict in # the History object are the same assert ( - model_out.history.epoch == model_out_clone.history.epoch + model_out._history.epoch == model_out_clone._history.epoch ), "Epoch lists differ between original and loaded model history" assert ( - model_out.history.history == model_out_clone.history.history + model_out._history.history == model_out_clone._history.history ), "History dictionary with losses and accuracies differ between original and loaded model history" assert ( - model_out.history.params == model_out_clone.history.params + model_out._history.params == model_out_clone._history.params ), "Params dictionaries differ between original and loaded model history" diff --git a/tests/gordo/machine/model/test_raw_keras.py b/tests/gordo/machine/model/test_raw_keras.py index f8059fa76..f6a8290a6 100644 --- a/tests/gordo/machine/model/test_raw_keras.py +++ b/tests/gordo/machine/model/test_raw_keras.py @@ -55,8 +55,9 @@ def test_raw_keras_basic(spec_str: str): """ spec = yaml.safe_load(spec_str) pipe = KerasRawModelRegressor(spec) - model = pipe() - assert isinstance(model, tf.keras.models.Sequential) + pipe._prepare_model() + + assert isinstance(pipe.model, tf.keras.models.Sequential) def test_raw_keras_part_of_pipeline(): From 6852cbc8e93c2ed67ff8b4c34de9faf85743a4b8 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Wed, 26 Jun 2024 10:09:47 +0300 Subject: [PATCH 48/52] Fix: adjust `lstm` `optimizer_kwargs` and `input.shape` access --- tests/gordo/machine/model/test_lstm_autoencoder.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/gordo/machine/model/test_lstm_autoencoder.py b/tests/gordo/machine/model/test_lstm_autoencoder.py index cbcb674d6..a4704e943 100644 --- a/tests/gordo/machine/model/test_lstm_autoencoder.py +++ b/tests/gordo/machine/model/test_lstm_autoencoder.py @@ -54,24 +54,24 @@ def test_lstm_hourglass_basic(self): func="tanh", out_func="relu", optimizer="SGD", - optimizer_kwargs={"lr": 0.02, "momentum": 0.001}, + optimizer_kwargs={"learning_rate": 0.02, "momentum": 0.001}, compile_kwargs={"loss": "mae"}, ) # Ensure that the input dimension to Keras model matches the number of features. - self.assertEqual(model.layers[0].input_shape[2], 3) + self.assertEqual(model.layers[0].input.shape[2], 3) # Ensure that the dimension of each encoding layer matches the expected dimension. self.assertEqual( - [model.layers[i].input_shape[2] for i in range(1, 4)], [3, 2, 2] + [model.layers[i].input.shape[2] for i in range(1, 4)], [3, 2, 2] ) # Ensure that the dimension of each decoding layer (excluding last decoding layer) # matches the expected dimension. - self.assertEqual([model.layers[i].input_shape[2] for i in range(4, 6)], [2, 2]) + self.assertEqual([model.layers[i].input.shape[2] for i in range(4, 6)], [2, 2]) # Ensure that the dimension of last decoding layer matches the expected dimension. - self.assertEqual(model.layers[6].input_shape[1], 3) + self.assertEqual(model.layers[6].input.shape[1], 3) # Ensure activation functions in the encoding part (layers 0-2) # match expected activation functions From 6d827e6f7d74e3a5a527381c7cd05fa510a13d53 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Wed, 26 Jun 2024 10:20:26 +0300 Subject: [PATCH 49/52] Fix: add input_shape to Dense layers in kerasraw test --- tests/gordo/machine/model/test_raw_keras.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/gordo/machine/model/test_raw_keras.py b/tests/gordo/machine/model/test_raw_keras.py index f6a8290a6..e3f179196 100644 --- a/tests/gordo/machine/model/test_raw_keras.py +++ b/tests/gordo/machine/model/test_raw_keras.py @@ -81,8 +81,12 @@ def test_raw_keras_part_of_pipeline(): layers: - tensorflow.keras.layers.Dense: units: 4 + input_shape: + - 4 - tensorflow.keras.layers.Dense: units: 1 + input_shape: + - 1 """ config = yaml.safe_load(config_str) pipe = serializer.from_definition(config) From ad19212e545cddc214db3b566c6198e8ee1fbe12 Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Mon, 8 Jul 2024 14:28:07 +0300 Subject: [PATCH 50/52] Formatting --- gordo/machine/model/anomaly/diff.py | 36 +++++++++---------- gordo/util/version.py | 3 +- .../config_elements/normalized_config.py | 8 +++-- 3 files changed, 25 insertions(+), 22 deletions(-) diff --git a/gordo/machine/model/anomaly/diff.py b/gordo/machine/model/anomaly/diff.py index e3b1f6f5d..051610d21 100644 --- a/gordo/machine/model/anomaly/diff.py +++ b/gordo/machine/model/anomaly/diff.py @@ -95,13 +95,13 @@ def get_metadata(self): if hasattr(self, "aggregate_threshold_"): metadata["aggregate-threshold"] = self.aggregate_threshold_ if hasattr(self, "feature_thresholds_per_fold_"): - metadata["feature-thresholds-per-fold"] = ( - self.feature_thresholds_per_fold_.to_dict() - ) + metadata[ + "feature-thresholds-per-fold" + ] = self.feature_thresholds_per_fold_.to_dict() if hasattr(self, "aggregate_thresholds_per_fold_"): - metadata["aggregate-thresholds-per-fold"] = ( - self.aggregate_thresholds_per_fold_ - ) + metadata[ + "aggregate-thresholds-per-fold" + ] = self.aggregate_thresholds_per_fold_ # Window threshold metadata if hasattr(self, "window"): metadata["window"] = self.window @@ -111,9 +111,9 @@ def get_metadata(self): hasattr(self, "smooth_feature_thresholds_") and self.smooth_aggregate_threshold_ is not None ): - metadata["smooth-feature-thresholds"] = ( - self.smooth_feature_thresholds_.tolist() - ) + metadata[ + "smooth-feature-thresholds" + ] = self.smooth_feature_thresholds_.tolist() if ( hasattr(self, "smooth_aggregate_threshold_") and self.smooth_aggregate_threshold_ is not None @@ -121,13 +121,13 @@ def get_metadata(self): metadata["smooth-aggregate-threshold"] = self.smooth_aggregate_threshold_ if hasattr(self, "smooth_feature_thresholds_per_fold_"): - metadata["smooth-feature-thresholds-per-fold"] = ( - self.smooth_feature_thresholds_per_fold_.to_dict() - ) + metadata[ + "smooth-feature-thresholds-per-fold" + ] = self.smooth_feature_thresholds_per_fold_.to_dict() if hasattr(self, "smooth_aggregate_thresholds_per_fold_"): - metadata["smooth-aggregate-thresholds-per-fold"] = ( - self.smooth_aggregate_thresholds_per_fold_ - ) + metadata[ + "smooth-aggregate-thresholds-per-fold" + ] = self.smooth_aggregate_thresholds_per_fold_ if isinstance(self.base_estimator, GordoBase): metadata.update(self.base_estimator.get_metadata()) @@ -241,9 +241,9 @@ def cross_validate( smooth_aggregate_threshold_fold = ( scaled_mse.rolling(self.window).min().max() ) - self.smooth_aggregate_thresholds_per_fold_[f"fold-{i}"] = ( - smooth_aggregate_threshold_fold - ) + self.smooth_aggregate_thresholds_per_fold_[ + f"fold-{i}" + ] = smooth_aggregate_threshold_fold smooth_tag_thresholds_fold = mae.rolling(self.window).min().max() smooth_tag_thresholds_fold.name = f"fold-{i}" diff --git a/gordo/util/version.py b/gordo/util/version.py index 60b11157d..a43d3a5dd 100644 --- a/gordo/util/version.py +++ b/gordo/util/version.py @@ -8,7 +8,8 @@ class Version(metaclass=ABCMeta): @abstractmethod - def get_version(self): ... + def get_version(self): + ... class Special(Enum): diff --git a/gordo/workflow/config_elements/normalized_config.py b/gordo/workflow/config_elements/normalized_config.py index 42377d378..6c5a76b47 100644 --- a/gordo/workflow/config_elements/normalized_config.py +++ b/gordo/workflow/config_elements/normalized_config.py @@ -119,9 +119,11 @@ def __init__( if gordo_version is None: gordo_version = __version__ default_globals = self.get_default_globals(gordo_version) - default_globals["runtime"]["influx"]["resources"] = ( # type: ignore - _calculate_influx_resources(len(config["machines"])) # type: ignore - ) + default_globals["runtime"]["influx"][ + "resources" + ] = _calculate_influx_resources( # type: ignore + len(config["machines"]) + ) # type: ignore passed_globals = load_globals_config( config.get("globals", dict()), join_json_paths("globals", json_path) From e94c9f66ae1912282d8629d26bf599c8429fbabc Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Mon, 8 Jul 2024 14:38:44 +0300 Subject: [PATCH 51/52] Formatting --- gordo/machine/model/anomaly/diff.py | 36 ++++++++++++++--------------- gordo/serializer/from_definition.py | 6 ++--- gordo/util/version.py | 3 +-- 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/gordo/machine/model/anomaly/diff.py b/gordo/machine/model/anomaly/diff.py index 051610d21..e3b1f6f5d 100644 --- a/gordo/machine/model/anomaly/diff.py +++ b/gordo/machine/model/anomaly/diff.py @@ -95,13 +95,13 @@ def get_metadata(self): if hasattr(self, "aggregate_threshold_"): metadata["aggregate-threshold"] = self.aggregate_threshold_ if hasattr(self, "feature_thresholds_per_fold_"): - metadata[ - "feature-thresholds-per-fold" - ] = self.feature_thresholds_per_fold_.to_dict() + metadata["feature-thresholds-per-fold"] = ( + self.feature_thresholds_per_fold_.to_dict() + ) if hasattr(self, "aggregate_thresholds_per_fold_"): - metadata[ - "aggregate-thresholds-per-fold" - ] = self.aggregate_thresholds_per_fold_ + metadata["aggregate-thresholds-per-fold"] = ( + self.aggregate_thresholds_per_fold_ + ) # Window threshold metadata if hasattr(self, "window"): metadata["window"] = self.window @@ -111,9 +111,9 @@ def get_metadata(self): hasattr(self, "smooth_feature_thresholds_") and self.smooth_aggregate_threshold_ is not None ): - metadata[ - "smooth-feature-thresholds" - ] = self.smooth_feature_thresholds_.tolist() + metadata["smooth-feature-thresholds"] = ( + self.smooth_feature_thresholds_.tolist() + ) if ( hasattr(self, "smooth_aggregate_threshold_") and self.smooth_aggregate_threshold_ is not None @@ -121,13 +121,13 @@ def get_metadata(self): metadata["smooth-aggregate-threshold"] = self.smooth_aggregate_threshold_ if hasattr(self, "smooth_feature_thresholds_per_fold_"): - metadata[ - "smooth-feature-thresholds-per-fold" - ] = self.smooth_feature_thresholds_per_fold_.to_dict() + metadata["smooth-feature-thresholds-per-fold"] = ( + self.smooth_feature_thresholds_per_fold_.to_dict() + ) if hasattr(self, "smooth_aggregate_thresholds_per_fold_"): - metadata[ - "smooth-aggregate-thresholds-per-fold" - ] = self.smooth_aggregate_thresholds_per_fold_ + metadata["smooth-aggregate-thresholds-per-fold"] = ( + self.smooth_aggregate_thresholds_per_fold_ + ) if isinstance(self.base_estimator, GordoBase): metadata.update(self.base_estimator.get_metadata()) @@ -241,9 +241,9 @@ def cross_validate( smooth_aggregate_threshold_fold = ( scaled_mse.rolling(self.window).min().max() ) - self.smooth_aggregate_thresholds_per_fold_[ - f"fold-{i}" - ] = smooth_aggregate_threshold_fold + self.smooth_aggregate_thresholds_per_fold_[f"fold-{i}"] = ( + smooth_aggregate_threshold_fold + ) smooth_tag_thresholds_fold = mae.rolling(self.window).min().max() smooth_tag_thresholds_fold.name = f"fold-{i}" diff --git a/gordo/serializer/from_definition.py b/gordo/serializer/from_definition.py index e12291c0a..9bff1b7fa 100644 --- a/gordo/serializer/from_definition.py +++ b/gordo/serializer/from_definition.py @@ -176,9 +176,9 @@ def _build_step( import_str = list(step.keys())[0] try: - StepClass: Union[ - None, FeatureUnion, Pipeline, BaseEstimator - ] = import_location(import_str) + StepClass: Union[None, FeatureUnion, Pipeline, BaseEstimator] = ( + import_location(import_str) + ) except (ImportError, ValueError): StepClass = None diff --git a/gordo/util/version.py b/gordo/util/version.py index a43d3a5dd..60b11157d 100644 --- a/gordo/util/version.py +++ b/gordo/util/version.py @@ -8,8 +8,7 @@ class Version(metaclass=ABCMeta): @abstractmethod - def get_version(self): - ... + def get_version(self): ... class Special(Enum): From 1fd82d5c24de9ac2db31b135603e8c601b4f442a Mon Sep 17 00:00:00 2001 From: RollerKnobster Date: Mon, 8 Jul 2024 21:03:12 +0300 Subject: [PATCH 52/52] Add gunicorn as base requirement --- requirements/full_requirements.txt | 4 +++- requirements/requirements.in | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/requirements/full_requirements.txt b/requirements/full_requirements.txt index 5cc78ccd7..642d64ee0 100644 --- a/requirements/full_requirements.txt +++ b/requirements/full_requirements.txt @@ -150,7 +150,9 @@ grpcio==1.64.1 # tensorboard # tensorflow gunicorn==22.0.0 - # via mlflow + # via + # -r requirements.in + # mlflow h5py==3.11.0 # via # keras diff --git a/requirements/requirements.in b/requirements/requirements.in index 3c8358c24..4945d87cb 100644 --- a/requirements/requirements.in +++ b/requirements/requirements.in @@ -4,6 +4,7 @@ jinja2~=3.1 python-dateutil~=2.8 tensorflow~=2.16.0 scikeras~=0.13.0 +gunicorn~=22.0 # There's a bug in keras 3.4.0 with loading models (https://github.com/keras-team/keras/issues/19921) keras<3.4.0 Flask>=2.2.5,<3.0.0