Skip to content

Commit

Permalink
create project strucutre with cookiecutter
Browse files Browse the repository at this point in the history
  • Loading branch information
Carl GENNETAIS committed Mar 21, 2024
0 parents commit 3474b1a
Show file tree
Hide file tree
Showing 25 changed files with 363 additions and 0 deletions.
89 changes: 89 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]

# C extensions
*.so

# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover

# Translations
*.mo
*.pot

# Django stuff:
*.log

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# DotEnv configuration
.env

# Database
*.db
*.rdb

# Pycharm
.idea

# VS Code
.vscode/

# Spyder
.spyproject/

# Jupyter NB Checkpoints
.ipynb_checkpoints/

# exclude data from source control by default
/data/

# Mac OS-specific storage files
.DS_Store

# vim
*.swp
*.swo

# Mypy cache
.mypy_cache/
144 changes: 144 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
.PHONY: clean data lint requirements sync_data_to_s3 sync_data_from_s3

#################################################################################
# GLOBALS #
#################################################################################

PROJECT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
BUCKET = [OPTIONAL] your-bucket-for-syncing-data (do not include 's3://')
PROFILE = default
PROJECT_NAME = p8_cloud
PYTHON_INTERPRETER = python3

ifeq (,$(shell which conda))
HAS_CONDA=False
else
HAS_CONDA=True
endif

#################################################################################
# COMMANDS #
#################################################################################

## Install Python Dependencies
requirements: test_environment
$(PYTHON_INTERPRETER) -m pip install -U pip setuptools wheel
$(PYTHON_INTERPRETER) -m pip install -r requirements.txt

## Make Dataset
data: requirements
$(PYTHON_INTERPRETER) src/data/make_dataset.py data/raw data/processed

## Delete all compiled Python files
clean:
find . -type f -name "*.py[co]" -delete
find . -type d -name "__pycache__" -delete

## Lint using flake8
lint:
flake8 src

## Upload Data to S3
sync_data_to_s3:
ifeq (default,$(PROFILE))
aws s3 sync data/ s3://$(BUCKET)/data/
else
aws s3 sync data/ s3://$(BUCKET)/data/ --profile $(PROFILE)
endif

## Download Data from S3
sync_data_from_s3:
ifeq (default,$(PROFILE))
aws s3 sync s3://$(BUCKET)/data/ data/
else
aws s3 sync s3://$(BUCKET)/data/ data/ --profile $(PROFILE)
endif

## Set up python interpreter environment
create_environment:
ifeq (True,$(HAS_CONDA))
@echo ">>> Detected conda, creating conda environment."
ifeq (3,$(findstring 3,$(PYTHON_INTERPRETER)))
conda create --name $(PROJECT_NAME) python=3
else
conda create --name $(PROJECT_NAME) python=2.7
endif
@echo ">>> New conda env created. Activate with:\nsource activate $(PROJECT_NAME)"
else
$(PYTHON_INTERPRETER) -m pip install -q virtualenv virtualenvwrapper
@echo ">>> Installing virtualenvwrapper if not already installed.\nMake sure the following lines are in shell startup file\n\
export WORKON_HOME=$$HOME/.virtualenvs\nexport PROJECT_HOME=$$HOME/Devel\nsource /usr/local/bin/virtualenvwrapper.sh\n"
@bash -c "source `which virtualenvwrapper.sh`;mkvirtualenv $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER)"
@echo ">>> New virtualenv created. Activate with:\nworkon $(PROJECT_NAME)"
endif

## Test python environment is setup correctly
test_environment:
$(PYTHON_INTERPRETER) test_environment.py

#################################################################################
# PROJECT RULES #
#################################################################################



#################################################################################
# Self Documenting Commands #
#################################################################################

.DEFAULT_GOAL := help

# Inspired by <http://marmelab.com/blog/2016/02/29/auto-documented-makefile.html>
# sed script explained:
# /^##/:
# * save line in hold space
# * purge line
# * Loop:
# * append newline + line to hold space
# * go to next line
# * if line starts with doc comment, strip comment character off and loop
# * remove target prerequisites
# * append hold space (+ newline) to line
# * replace newline plus comments by `---`
# * print line
# Separate expressions are necessary because labels cannot be delimited by
# semicolon; see <http://stackoverflow.com/a/11799865/1968>
.PHONY: help
help:
@echo "$$(tput bold)Available rules:$$(tput sgr0)"
@echo
@sed -n -e "/^## / { \
h; \
s/.*//; \
:doc" \
-e "H; \
n; \
s/^## //; \
t doc" \
-e "s/:.*//; \
G; \
s/\\n## /---/; \
s/\\n/ /g; \
p; \
}" ${MAKEFILE_LIST} \
| LC_ALL='C' sort --ignore-case \
| awk -F '---' \
-v ncol=$$(tput cols) \
-v indent=19 \
-v col_on="$$(tput setaf 6)" \
-v col_off="$$(tput sgr0)" \
'{ \
printf "%s%*s%s ", col_on, -indent, $$1, col_off; \
n = split($$2, words, " "); \
line_length = ncol - indent; \
for (i = 1; i <= n; i++) { \
line_length -= length(words[i]) + 1; \
if (line_length <= 0) { \
line_length = ncol - indent - length(words[i]) - 1; \
printf "\n%*s ", -indent, " "; \
} \
printf "%s ", words[i]; \
} \
printf "\n"; \
}' \
| more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars')
55 changes: 55 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
P8_cloud
==============================

OpenClassrooms Projet 8 : Déployer un modèle dans le cloud

Project Organization
------------

├── LICENSE
├── Makefile <- Makefile with commands like `make data` or `make train`
├── README.md <- The top-level README for developers using this project.
├── data
│   ├── external <- Data from third party sources.
│   ├── interim <- Intermediate data that has been transformed.
│   ├── processed <- The final, canonical data sets for modeling.
│   └── raw <- The original, immutable data dump.
├── models <- Trained and serialized models, model predictions, or model summaries
├── notebooks <- Jupyter notebooks. Naming convention is a number (for ordering),
│ the creator's initials, and a short `-` delimited description, e.g.
│ `1.0-jqp-initial-data-exploration`.
├── references <- Data dictionaries, manuals, and all other explanatory materials.
├── reports <- Generated analysis as HTML, PDF, LaTeX, etc.
│   └── figures <- Generated graphics and figures to be used in reporting
├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g.
│ generated with `pip freeze > requirements.txt`
├── setup.py <- makes project pip installable (pip install -e .) so src can be imported
├── src <- Source code for use in this project.
│   ├── __init__.py <- Makes src a Python module
│ │
│   ├── data <- Scripts to download or generate data
│   │   └── make_dataset.py
│ │
│   ├── features <- Scripts to turn raw data into features for modeling
│   │   └── build_features.py
│ │
│   ├── models <- Scripts to train models and then use trained models to make
│ │ │ predictions
│   │   ├── predict_model.py
│   │   └── train_model.py
│ │
│   └── visualization <- Scripts to create exploratory and results oriented visualizations
│   └── visualize.py
└── tox.ini <- tox file with settings for running tox; see tox.readthedocs.io


--------

<p><small>Project based on the <a target="_blank" href="https://drivendata.github.io/cookiecutter-data-science/">cookiecutter data science project template</a>. #cookiecutterdatascience</small></p>
Empty file added models/.gitkeep
Empty file.
Empty file added notebooks/.gitkeep
Empty file.
Empty file added references/.gitkeep
Empty file.
Empty file added reports/.gitkeep
Empty file.
Empty file added reports/figures/.gitkeep
Empty file.
10 changes: 10 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# local package
-e .

# external requirements
click
# Sphinx
coverage
# awscli
flake8
python-dotenv>=0.5.1
10 changes: 10 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from setuptools import find_packages, setup

setup(
name='src',
packages=find_packages(),
version='0.1.0',
description='OpenClassrooms Projet 8 : Déployer un modèle dans le cloud',
author='carl',
license='',
)
Empty file added src/__init__.py
Empty file.
Empty file added src/data/.gitkeep
Empty file.
Empty file added src/data/__init__.py
Empty file.
30 changes: 30 additions & 0 deletions src/data/make_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# -*- coding: utf-8 -*-
import click
import logging
from pathlib import Path
from dotenv import find_dotenv, load_dotenv


@click.command()
@click.argument('input_filepath', type=click.Path(exists=True))
@click.argument('output_filepath', type=click.Path())
def main(input_filepath, output_filepath):
""" Runs data processing scripts to turn raw data from (../raw) into
cleaned data ready to be analyzed (saved in ../processed).
"""
logger = logging.getLogger(__name__)
logger.info('making final data set from raw data')


if __name__ == '__main__':
log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
logging.basicConfig(level=logging.INFO, format=log_fmt)

# not used in this stub but often useful for finding various files
project_dir = Path(__file__).resolve().parents[2]

# find .env automagically by walking up directories until it's found, then
# load up the .env entries as environment variables
load_dotenv(find_dotenv())

main()
Empty file added src/features/.gitkeep
Empty file.
Empty file added src/features/__init__.py
Empty file.
Empty file added src/features/build_features.py
Empty file.
Empty file added src/models/.gitkeep
Empty file.
Empty file added src/models/__init__.py
Empty file.
Empty file added src/models/predict_model.py
Empty file.
Empty file added src/models/train_model.py
Empty file.
Empty file added src/visualization/.gitkeep
Empty file.
Empty file added src/visualization/__init__.py
Empty file.
Empty file added src/visualization/visualize.py
Empty file.
25 changes: 25 additions & 0 deletions test_environment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import sys

REQUIRED_PYTHON = "python3"


def main():
system_major = sys.version_info.major
if REQUIRED_PYTHON == "python":
required_major = 2
elif REQUIRED_PYTHON == "python3":
required_major = 3
else:
raise ValueError("Unrecognized python interpreter: {}".format(
REQUIRED_PYTHON))

if system_major != required_major:
raise TypeError(
"This project requires Python {}. Found: Python {}".format(
required_major, sys.version))
else:
print(">>> Development environment passes all tests!")


if __name__ == '__main__':
main()

0 comments on commit 3474b1a

Please sign in to comment.