Skip to content

Commit

Permalink
refactoring packages files, gitignore update, adding example config, …
Browse files Browse the repository at this point in the history
…readme update
  • Loading branch information
kennethjmyers committed Apr 15, 2024
1 parent d50e61b commit 8b9a996
Show file tree
Hide file tree
Showing 7 changed files with 202 additions and 47 deletions.
52 changes: 52 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,58 @@ dmypy.json
# Pyre type checker
.pyre/

# Local .terraform directories
**/.terraform/*

# .tfstate files
*.tfstate
*.tfstate.*

# plan files
*-plan.out

# Crash log files
crash.log
crash.*.log

# Exclude all .tfvars files, which are likely to contain sensitive data, such as
# password, private keys, and other secrets. These should not be part of version
# control as they are data points which are potentially sensitive and subject
# to change depending on the environment.
# we are not putting secrets in .tfvars so I think it is fine to ignore this
# *.tfvars
*.tfvars.json

# Ignore override files as they are usually used to override resources locally and so
# are not checked in
override.tf
override.tf.json
*_override.tf
*_override.tf.json

# Include override files you do wish to add to version control using negated pattern
# !example_override.tf

# Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan
# example: *tfplan*

# Ignore CLI configuration files
.terraformrc
terraform.rc

.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
!.vscode/*.code-snippets

# Local History for Visual Studio Code
.history/

# Built Visual Studio Code Extensions
*.vsix

#######################
# Unique to this repo #
#######################
Expand Down
1 change: 0 additions & 1 deletion .tool-versions

This file was deleted.

28 changes: 25 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
![Python](https://img.shields.io/badge/python-3.8.15-blue.svg)

# Viral Reddit Posts Model

The purpose of this repo is to:
Expand All @@ -14,10 +16,30 @@ The purpose of this repo is to:
1. Install Terraform CLI
2. Install AWS CLI and run `aws configure` and enter in your aws credentials.
3. Clone this repository
4. From within this repository run the following:
4. You can run the tests locally yourself by doing the following (it is recommended that you manage your python environments with something like [asdf](https://asdf-vm.com/) and use python==3.12.3 as your local runtime):

```sh
python -m venv venv # this sets up a local virtual env using the current python runtime
source ./venv//bin/activate # activates the virtual env
pip install -e ."[dev]" # installs this packages in local env with dependencies
pytest . -r f -s # -r f shows extra info for failures, -s disables capturing
```

5. From within this repository run the following:

```sh
terraform init
terraform apply
terraform workspace new dev # this should switch you to the dev workspace
terraform plan -var-file="dev.tfvars" -out=dev-plan.out
terraform apply -var-file="dev.tfvars" dev-plan.out
```

For deploying to prd

```sh
terraform workspace new prd # or terraform workspace select prd if already created
terraform plan -var-file="prd.tfvars" -out=prd-plan.out
terraform apply -var-file="prd.tfvars" prd-plan.out
```
If you don't want to apply the changes to your aws account you can instead run `terraform plan`.

On subsequent updates you don't need to `init` or make a new workspace again.
22 changes: 22 additions & 0 deletions example_reddit.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# rename this file .reddit.cfg and place it in your home directory
[reddit_api]
CLIENTID: "XXXX"
CLIENTSECRET: "XXXX"
PASSWORD: "XXXX"
USERNAME: "XXXX"

[S3_access]
ACCESSKEY: "XXXX"
SECRETKEY: "XXXX"

[Discord]
BOTTOKEN: "XXXX"
MYSNOWFLAKEID: "XXXX"
CHANNELSNOWFLAKEID: [123456789,987654321]

[Postgres]
USERNAME: "XXXX"
PASSWORD: "XXXX"
HOST: "XXXX"
PORT: "XXXX"
DATABASE: "XXXX"
43 changes: 43 additions & 0 deletions old_setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# #!/usr/bin/env python
# # good reference: https://github.com/boto/boto3/blob/develop/setup.py
# from setuptools import find_packages, setup
#
# # install_requires = [
# # "boto3==1.26.117",
# # "matplotlib==3.3.4",
# # "numpy==1.21.6", # required by pyspark
# # "pandas==1.0.5", # required by pyspark
# # "pg8000==1.29.4", # this was easier to pip install than psycopg2
# # "pyarrow==2.0.0",
# # "pyspark==3.3.0", # using this version because py37 deprecated in pyspark 3.4.0
# # "requests==2.31.0",
# # "scikit-learn==1.0.2",
# # "seaborn==0.11.2",
# # "shap==0.41.0",
# # "sqlalchemy==1.4.46", # originally tried 2.0.10, but this was incompatible with old versions of pandas https://stackoverflow.com/a/75282604/5034651,
# # "viral_reddit_posts_utils @ git+https://github.com/ViralRedditPosts/Utils.git@main",
# # ]
# #
# # test_requires = [
# # "moto[dynamodb,s3]==4.1.8",
# # "pytest==7.3.1",
# # "pytest-cov==4.0.0",
# # ]+install_requires
# #
# # build_requires = ["flake8", "black"] + test_requires
# # dev_requires = ["pre-commit==2.21.0"] + build_requires
#
# setup(name='viral_reddit_posts_model',
# version='0.0.1',
# description='Model for Viral Reddit Posts project',
# author='Kenneth Myers',
# url='https://github.com/ViralRedditPosts',
# packages=find_packages(exclude=['tests*']),
# python_requires=">= 3.7",
# # install_requires=install_requires,
# # extras_require={
# # 'test':test_requires,
# # 'build':build_requires,
# # 'dev':dev_requires
# # }
# )
60 changes: 60 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
[build-system]
requires = ["setuptools >= 61.0"]
build-backend = "setuptools.build_meta"

# see https://packaging.python.org/en/latest/guides/writing-pyproject-toml/
[project]
name = "Reddit-Model"

dynamic = ["version"]

dependencies = [
"boto3==1.26.117",
"matplotlib==3.3.4",
"numpy==1.21.6", # required by pyspark
"pandas==1.3", # 1.3 at least needed for M1 Mac, lower version required by pyspark
"pg8000==1.29.4", # this was easier to pip install than psycopg2
"pyarrow==15.0.2", # don't use low versions which pin lower versions of numpy that break on M1 Mac
"pyspark==3.4.0", # using this version because py37 deprecated in pyspark 3.4.0
"requests==2.31.0",
"scikit-learn==1.0.2",
"seaborn==0.11.2",
"shap==0.41.0",
"sqlalchemy==1.4.46", # originally tried 2.0.10, but this was incompatible with old versions of pandas https://stackoverflow.com/a/75282604/5034651,
"viral_reddit_posts_utils @ git+https://github.com/ViralRedditPosts/Utils.git@main",
]

requires-python = "== 3.8.15"

authors = [
{name = "Kenneth Myers", email = "[email protected]"},
]

description = "This project contains the model for scoring rising reddit posts on potential for going viral. It is intended to be built as a docker image and deployed on Amazon Fargate."

readme = "README.md"

[project.optional-dependencies]
test = [
"moto[dynamodb,s3]==4.1.8",
"pytest==7.3.1",
"pytest-cov==4.0.0",
"Reddit-Model"
]
build = [
"flake8",
"black",
"Reddit-Model[test]"
]
dev = [
"pre-commit==2.21.0",
"Reddit-Model[build]"
]

[tool.setuptools.packages.find]
where = ['model']
exclude = ['tests*']




43 changes: 0 additions & 43 deletions setup.py

This file was deleted.

0 comments on commit 8b9a996

Please sign in to comment.