From 3474b1a28db33949f2bc9d90cd0af6a8281c4ee7 Mon Sep 17 00:00:00 2001 From: Carl GENNETAIS Date: Thu, 21 Mar 2024 19:00:26 +0100 Subject: [PATCH] create project strucutre with cookiecutter --- .gitignore | 89 ++++++++++++++++++++ Makefile | 144 +++++++++++++++++++++++++++++++++ README.md | 55 +++++++++++++ models/.gitkeep | 0 notebooks/.gitkeep | 0 references/.gitkeep | 0 reports/.gitkeep | 0 reports/figures/.gitkeep | 0 requirements.txt | 10 +++ setup.py | 10 +++ src/__init__.py | 0 src/data/.gitkeep | 0 src/data/__init__.py | 0 src/data/make_dataset.py | 30 +++++++ src/features/.gitkeep | 0 src/features/__init__.py | 0 src/features/build_features.py | 0 src/models/.gitkeep | 0 src/models/__init__.py | 0 src/models/predict_model.py | 0 src/models/train_model.py | 0 src/visualization/.gitkeep | 0 src/visualization/__init__.py | 0 src/visualization/visualize.py | 0 test_environment.py | 25 ++++++ 25 files changed, 363 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 README.md create mode 100644 models/.gitkeep create mode 100644 notebooks/.gitkeep create mode 100644 references/.gitkeep create mode 100644 reports/.gitkeep create mode 100644 reports/figures/.gitkeep create mode 100644 requirements.txt create mode 100644 setup.py create mode 100644 src/__init__.py create mode 100644 src/data/.gitkeep create mode 100644 src/data/__init__.py create mode 100644 src/data/make_dataset.py create mode 100644 src/features/.gitkeep create mode 100644 src/features/__init__.py create mode 100644 src/features/build_features.py create mode 100644 src/models/.gitkeep create mode 100644 src/models/__init__.py create mode 100644 src/models/predict_model.py create mode 100644 src/models/train_model.py create mode 100644 src/visualization/.gitkeep create mode 100644 src/visualization/__init__.py create mode 100644 src/visualization/visualize.py create mode 100644 test_environment.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d7c9832 --- /dev/null +++ b/.gitignore @@ -0,0 +1,89 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover + +# Translations +*.mo +*.pot + +# Django stuff: +*.log + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# DotEnv configuration +.env + +# Database +*.db +*.rdb + +# Pycharm +.idea + +# VS Code +.vscode/ + +# Spyder +.spyproject/ + +# Jupyter NB Checkpoints +.ipynb_checkpoints/ + +# exclude data from source control by default +/data/ + +# Mac OS-specific storage files +.DS_Store + +# vim +*.swp +*.swo + +# Mypy cache +.mypy_cache/ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..9526ad1 --- /dev/null +++ b/Makefile @@ -0,0 +1,144 @@ +.PHONY: clean data lint requirements sync_data_to_s3 sync_data_from_s3 + +################################################################################# +# GLOBALS # +################################################################################# + +PROJECT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) +BUCKET = [OPTIONAL] your-bucket-for-syncing-data (do not include 's3://') +PROFILE = default +PROJECT_NAME = p8_cloud +PYTHON_INTERPRETER = python3 + +ifeq (,$(shell which conda)) +HAS_CONDA=False +else +HAS_CONDA=True +endif + +################################################################################# +# COMMANDS # +################################################################################# + +## Install Python Dependencies +requirements: test_environment + $(PYTHON_INTERPRETER) -m pip install -U pip setuptools wheel + $(PYTHON_INTERPRETER) -m pip install -r requirements.txt + +## Make Dataset +data: requirements + $(PYTHON_INTERPRETER) src/data/make_dataset.py data/raw data/processed + +## Delete all compiled Python files +clean: + find . -type f -name "*.py[co]" -delete + find . -type d -name "__pycache__" -delete + +## Lint using flake8 +lint: + flake8 src + +## Upload Data to S3 +sync_data_to_s3: +ifeq (default,$(PROFILE)) + aws s3 sync data/ s3://$(BUCKET)/data/ +else + aws s3 sync data/ s3://$(BUCKET)/data/ --profile $(PROFILE) +endif + +## Download Data from S3 +sync_data_from_s3: +ifeq (default,$(PROFILE)) + aws s3 sync s3://$(BUCKET)/data/ data/ +else + aws s3 sync s3://$(BUCKET)/data/ data/ --profile $(PROFILE) +endif + +## Set up python interpreter environment +create_environment: +ifeq (True,$(HAS_CONDA)) + @echo ">>> Detected conda, creating conda environment." +ifeq (3,$(findstring 3,$(PYTHON_INTERPRETER))) + conda create --name $(PROJECT_NAME) python=3 +else + conda create --name $(PROJECT_NAME) python=2.7 +endif + @echo ">>> New conda env created. Activate with:\nsource activate $(PROJECT_NAME)" +else + $(PYTHON_INTERPRETER) -m pip install -q virtualenv virtualenvwrapper + @echo ">>> Installing virtualenvwrapper if not already installed.\nMake sure the following lines are in shell startup file\n\ + export WORKON_HOME=$$HOME/.virtualenvs\nexport PROJECT_HOME=$$HOME/Devel\nsource /usr/local/bin/virtualenvwrapper.sh\n" + @bash -c "source `which virtualenvwrapper.sh`;mkvirtualenv $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER)" + @echo ">>> New virtualenv created. Activate with:\nworkon $(PROJECT_NAME)" +endif + +## Test python environment is setup correctly +test_environment: + $(PYTHON_INTERPRETER) test_environment.py + +################################################################################# +# PROJECT RULES # +################################################################################# + + + +################################################################################# +# Self Documenting Commands # +################################################################################# + +.DEFAULT_GOAL := help + +# Inspired by +# sed script explained: +# /^##/: +# * save line in hold space +# * purge line +# * Loop: +# * append newline + line to hold space +# * go to next line +# * if line starts with doc comment, strip comment character off and loop +# * remove target prerequisites +# * append hold space (+ newline) to line +# * replace newline plus comments by `---` +# * print line +# Separate expressions are necessary because labels cannot be delimited by +# semicolon; see +.PHONY: help +help: + @echo "$$(tput bold)Available rules:$$(tput sgr0)" + @echo + @sed -n -e "/^## / { \ + h; \ + s/.*//; \ + :doc" \ + -e "H; \ + n; \ + s/^## //; \ + t doc" \ + -e "s/:.*//; \ + G; \ + s/\\n## /---/; \ + s/\\n/ /g; \ + p; \ + }" ${MAKEFILE_LIST} \ + | LC_ALL='C' sort --ignore-case \ + | awk -F '---' \ + -v ncol=$$(tput cols) \ + -v indent=19 \ + -v col_on="$$(tput setaf 6)" \ + -v col_off="$$(tput sgr0)" \ + '{ \ + printf "%s%*s%s ", col_on, -indent, $$1, col_off; \ + n = split($$2, words, " "); \ + line_length = ncol - indent; \ + for (i = 1; i <= n; i++) { \ + line_length -= length(words[i]) + 1; \ + if (line_length <= 0) { \ + line_length = ncol - indent - length(words[i]) - 1; \ + printf "\n%*s ", -indent, " "; \ + } \ + printf "%s ", words[i]; \ + } \ + printf "\n"; \ + }' \ + | more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars') diff --git a/README.md b/README.md new file mode 100644 index 0000000..0e8b46a --- /dev/null +++ b/README.md @@ -0,0 +1,55 @@ +P8_cloud +============================== + +OpenClassrooms Projet 8 : Déployer un modèle dans le cloud + +Project Organization +------------ + + ├── LICENSE + ├── Makefile <- Makefile with commands like `make data` or `make train` + ├── README.md <- The top-level README for developers using this project. + ├── data + │   ├── external <- Data from third party sources. + │   ├── interim <- Intermediate data that has been transformed. + │   ├── processed <- The final, canonical data sets for modeling. + │   └── raw <- The original, immutable data dump. + │ + ├── models <- Trained and serialized models, model predictions, or model summaries + │ + ├── notebooks <- Jupyter notebooks. Naming convention is a number (for ordering), + │ the creator's initials, and a short `-` delimited description, e.g. + │ `1.0-jqp-initial-data-exploration`. + │ + ├── references <- Data dictionaries, manuals, and all other explanatory materials. + │ + ├── reports <- Generated analysis as HTML, PDF, LaTeX, etc. + │   └── figures <- Generated graphics and figures to be used in reporting + │ + ├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g. + │ generated with `pip freeze > requirements.txt` + │ + ├── setup.py <- makes project pip installable (pip install -e .) so src can be imported + ├── src <- Source code for use in this project. + │   ├── __init__.py <- Makes src a Python module + │ │ + │   ├── data <- Scripts to download or generate data + │   │   └── make_dataset.py + │ │ + │   ├── features <- Scripts to turn raw data into features for modeling + │   │   └── build_features.py + │ │ + │   ├── models <- Scripts to train models and then use trained models to make + │ │ │ predictions + │   │   ├── predict_model.py + │   │   └── train_model.py + │ │ + │   └── visualization <- Scripts to create exploratory and results oriented visualizations + │   └── visualize.py + │ + └── tox.ini <- tox file with settings for running tox; see tox.readthedocs.io + + +-------- + +

Project based on the cookiecutter data science project template. #cookiecutterdatascience

diff --git a/models/.gitkeep b/models/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/notebooks/.gitkeep b/notebooks/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/references/.gitkeep b/references/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/reports/.gitkeep b/reports/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/reports/figures/.gitkeep b/reports/figures/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..73d5b57 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +# local package +-e . + +# external requirements +click +# Sphinx +coverage +# awscli +flake8 +python-dotenv>=0.5.1 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..9157905 --- /dev/null +++ b/setup.py @@ -0,0 +1,10 @@ +from setuptools import find_packages, setup + +setup( + name='src', + packages=find_packages(), + version='0.1.0', + description='OpenClassrooms Projet 8 : Déployer un modèle dans le cloud', + author='carl', + license='', +) diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/data/.gitkeep b/src/data/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/src/data/__init__.py b/src/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/data/make_dataset.py b/src/data/make_dataset.py new file mode 100644 index 0000000..96b377a --- /dev/null +++ b/src/data/make_dataset.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +import click +import logging +from pathlib import Path +from dotenv import find_dotenv, load_dotenv + + +@click.command() +@click.argument('input_filepath', type=click.Path(exists=True)) +@click.argument('output_filepath', type=click.Path()) +def main(input_filepath, output_filepath): + """ Runs data processing scripts to turn raw data from (../raw) into + cleaned data ready to be analyzed (saved in ../processed). + """ + logger = logging.getLogger(__name__) + logger.info('making final data set from raw data') + + +if __name__ == '__main__': + log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + logging.basicConfig(level=logging.INFO, format=log_fmt) + + # not used in this stub but often useful for finding various files + project_dir = Path(__file__).resolve().parents[2] + + # find .env automagically by walking up directories until it's found, then + # load up the .env entries as environment variables + load_dotenv(find_dotenv()) + + main() diff --git a/src/features/.gitkeep b/src/features/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/src/features/__init__.py b/src/features/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/features/build_features.py b/src/features/build_features.py new file mode 100644 index 0000000..e69de29 diff --git a/src/models/.gitkeep b/src/models/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/src/models/__init__.py b/src/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/models/predict_model.py b/src/models/predict_model.py new file mode 100644 index 0000000..e69de29 diff --git a/src/models/train_model.py b/src/models/train_model.py new file mode 100644 index 0000000..e69de29 diff --git a/src/visualization/.gitkeep b/src/visualization/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/src/visualization/__init__.py b/src/visualization/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/visualization/visualize.py b/src/visualization/visualize.py new file mode 100644 index 0000000..e69de29 diff --git a/test_environment.py b/test_environment.py new file mode 100644 index 0000000..d0ac4a7 --- /dev/null +++ b/test_environment.py @@ -0,0 +1,25 @@ +import sys + +REQUIRED_PYTHON = "python3" + + +def main(): + system_major = sys.version_info.major + if REQUIRED_PYTHON == "python": + required_major = 2 + elif REQUIRED_PYTHON == "python3": + required_major = 3 + else: + raise ValueError("Unrecognized python interpreter: {}".format( + REQUIRED_PYTHON)) + + if system_major != required_major: + raise TypeError( + "This project requires Python {}. Found: Python {}".format( + required_major, sys.version)) + else: + print(">>> Development environment passes all tests!") + + +if __name__ == '__main__': + main()