diff --git a/.gitignore b/.gitignore index 7e9b39a..5f15c28 100644 --- a/.gitignore +++ b/.gitignore @@ -207,6 +207,58 @@ dmypy.json # Pyre type checker .pyre/ +# Local .terraform directories +**/.terraform/* + +# .tfstate files +*.tfstate +*.tfstate.* + +# plan files +*-plan.out + +# Crash log files +crash.log +crash.*.log + +# Exclude all .tfvars files, which are likely to contain sensitive data, such as +# password, private keys, and other secrets. These should not be part of version +# control as they are data points which are potentially sensitive and subject +# to change depending on the environment. +# we are not putting secrets in .tfvars so I think it is fine to ignore this +# *.tfvars +*.tfvars.json + +# Ignore override files as they are usually used to override resources locally and so +# are not checked in +override.tf +override.tf.json +*_override.tf +*_override.tf.json + +# Include override files you do wish to add to version control using negated pattern +# !example_override.tf + +# Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan +# example: *tfplan* + +# Ignore CLI configuration files +.terraformrc +terraform.rc + +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +!.vscode/*.code-snippets + +# Local History for Visual Studio Code +.history/ + +# Built Visual Studio Code Extensions +*.vsix + ####################### # Unique to this repo # ####################### diff --git a/.tool-versions b/.tool-versions deleted file mode 100644 index 05cb24f..0000000 --- a/.tool-versions +++ /dev/null @@ -1 +0,0 @@ -python 3.8.15 diff --git a/README.md b/README.md index c8fb3de..4e498b6 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +![Python](https://img.shields.io/badge/python-3.8.15-blue.svg) + # Viral Reddit Posts Model The purpose of this repo is to: @@ -14,10 +16,30 @@ The purpose of this repo is to: 1. Install Terraform CLI 2. Install AWS CLI and run `aws configure` and enter in your aws credentials. 3. Clone this repository -4. From within this repository run the following: +4. You can run the tests locally yourself by doing the following (it is recommended that you manage your python environments with something like [asdf](https://asdf-vm.com/) and use python==3.12.3 as your local runtime): + + ```sh + python -m venv venv # this sets up a local virtual env using the current python runtime + source ./venv//bin/activate # activates the virtual env + pip install -e ."[dev]" # installs this packages in local env with dependencies + pytest . -r f -s # -r f shows extra info for failures, -s disables capturing + ``` + +5. From within this repository run the following: ```sh terraform init - terraform apply + terraform workspace new dev # this should switch you to the dev workspace + terraform plan -var-file="dev.tfvars" -out=dev-plan.out + terraform apply -var-file="dev.tfvars" dev-plan.out + ``` + + For deploying to prd + + ```sh + terraform workspace new prd # or terraform workspace select prd if already created + terraform plan -var-file="prd.tfvars" -out=prd-plan.out + terraform apply -var-file="prd.tfvars" prd-plan.out ``` - If you don't want to apply the changes to your aws account you can instead run `terraform plan`. \ No newline at end of file + + On subsequent updates you don't need to `init` or make a new workspace again. \ No newline at end of file diff --git a/example_reddit.cfg b/example_reddit.cfg new file mode 100644 index 0000000..47eb3ce --- /dev/null +++ b/example_reddit.cfg @@ -0,0 +1,22 @@ +# rename this file .reddit.cfg and place it in your home directory +[reddit_api] +CLIENTID: "XXXX" +CLIENTSECRET: "XXXX" +PASSWORD: "XXXX" +USERNAME: "XXXX" + +[S3_access] +ACCESSKEY: "XXXX" +SECRETKEY: "XXXX" + +[Discord] +BOTTOKEN: "XXXX" +MYSNOWFLAKEID: "XXXX" +CHANNELSNOWFLAKEID: [123456789,987654321] + +[Postgres] +USERNAME: "XXXX" +PASSWORD: "XXXX" +HOST: "XXXX" +PORT: "XXXX" +DATABASE: "XXXX" diff --git a/old_setup.py b/old_setup.py new file mode 100644 index 0000000..6d0f2c0 --- /dev/null +++ b/old_setup.py @@ -0,0 +1,43 @@ +# #!/usr/bin/env python +# # good reference: https://github.com/boto/boto3/blob/develop/setup.py +# from setuptools import find_packages, setup +# +# # install_requires = [ +# # "boto3==1.26.117", +# # "matplotlib==3.3.4", +# # "numpy==1.21.6", # required by pyspark +# # "pandas==1.0.5", # required by pyspark +# # "pg8000==1.29.4", # this was easier to pip install than psycopg2 +# # "pyarrow==2.0.0", +# # "pyspark==3.3.0", # using this version because py37 deprecated in pyspark 3.4.0 +# # "requests==2.31.0", +# # "scikit-learn==1.0.2", +# # "seaborn==0.11.2", +# # "shap==0.41.0", +# # "sqlalchemy==1.4.46", # originally tried 2.0.10, but this was incompatible with old versions of pandas https://stackoverflow.com/a/75282604/5034651, +# # "viral_reddit_posts_utils @ git+https://github.com/ViralRedditPosts/Utils.git@main", +# # ] +# # +# # test_requires = [ +# # "moto[dynamodb,s3]==4.1.8", +# # "pytest==7.3.1", +# # "pytest-cov==4.0.0", +# # ]+install_requires +# # +# # build_requires = ["flake8", "black"] + test_requires +# # dev_requires = ["pre-commit==2.21.0"] + build_requires +# +# setup(name='viral_reddit_posts_model', +# version='0.0.1', +# description='Model for Viral Reddit Posts project', +# author='Kenneth Myers', +# url='https://github.com/ViralRedditPosts', +# packages=find_packages(exclude=['tests*']), +# python_requires=">= 3.7", +# # install_requires=install_requires, +# # extras_require={ +# # 'test':test_requires, +# # 'build':build_requires, +# # 'dev':dev_requires +# # } +# ) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..f298fc4 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,60 @@ +[build-system] +requires = ["setuptools >= 61.0"] +build-backend = "setuptools.build_meta" + +# see https://packaging.python.org/en/latest/guides/writing-pyproject-toml/ +[project] +name = "Reddit-Model" + +dynamic = ["version"] + +dependencies = [ + "boto3==1.26.117", + "matplotlib==3.3.4", + "numpy==1.21.6", # required by pyspark + "pandas==1.3", # 1.3 at least needed for M1 Mac, lower version required by pyspark + "pg8000==1.29.4", # this was easier to pip install than psycopg2 + "pyarrow==15.0.2", # don't use low versions which pin lower versions of numpy that break on M1 Mac + "pyspark==3.4.0", # using this version because py37 deprecated in pyspark 3.4.0 + "requests==2.31.0", + "scikit-learn==1.0.2", + "seaborn==0.11.2", + "shap==0.41.0", + "sqlalchemy==1.4.46", # originally tried 2.0.10, but this was incompatible with old versions of pandas https://stackoverflow.com/a/75282604/5034651, + "viral_reddit_posts_utils @ git+https://github.com/ViralRedditPosts/Utils.git@main", +] + +requires-python = "== 3.8.15" + +authors = [ + {name = "Kenneth Myers", email = "myers.kenneth.james@gmail.com"}, +] + +description = "This project contains the model for scoring rising reddit posts on potential for going viral. It is intended to be built as a docker image and deployed on Amazon Fargate." + +readme = "README.md" + +[project.optional-dependencies] +test = [ + "moto[dynamodb,s3]==4.1.8", + "pytest==7.3.1", + "pytest-cov==4.0.0", + "Reddit-Model" +] +build = [ + "flake8", + "black", + "Reddit-Model[test]" +] +dev = [ + "pre-commit==2.21.0", + "Reddit-Model[build]" +] + +[tool.setuptools.packages.find] +where = ['model'] +exclude = ['tests*'] + + + + diff --git a/setup.py b/setup.py deleted file mode 100644 index c308719..0000000 --- a/setup.py +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env python -# good reference: https://github.com/boto/boto3/blob/develop/setup.py -from setuptools import find_packages, setup - -install_requires = [ - "boto3==1.26.117", - "matplotlib==3.3.4", - "numpy==1.21.6", # required by pyspark - "pandas==1.0.5", # required by pyspark - "pg8000==1.29.4", # this was easier to pip install than psycopg2 - "pyarrow==2.0.0", - "pyspark==3.3.0", # using this version because py37 deprecated in pyspark 3.4.0 - "requests==2.31.0", - "scikit-learn==1.0.2", - "seaborn==0.11.2", - "shap==0.41.0", - "sqlalchemy==1.4.46", # originally tried 2.0.10, but this was incompatible with old versions of pandas https://stackoverflow.com/a/75282604/5034651, - "viral_reddit_posts_utils @ git+https://github.com/ViralRedditPosts/Utils.git@main", -] - -test_requires = [ - "moto[dynamodb,s3]==4.1.8", - "pytest==7.3.1", - "pytest-cov==4.0.0", -]+install_requires - -build_requires = ["flake8", "black"] + test_requires -dev_requires = ["pre-commit==2.21.0"] + build_requires - -setup(name='viral_reddit_posts_model', - version='0.0.1', - description='Model for Viral Reddit Posts project', - author='Kenneth Myers', - url='https://github.com/ViralRedditPosts', - packages=find_packages(exclude=['tests*']), - python_requires=">= 3.7", - install_requires=install_requires, - extras_require={ - 'test':test_requires, - 'build':build_requires, - 'dev':dev_requires - } - ) \ No newline at end of file