Deep Learning addition commands

Kenza-AI · Apr 24, 2018 · be8f1bc · be8f1bc
1 parent bbb5205
commit be8f1bc
Show file tree

Hide file tree

Showing 24 changed files with 530 additions and 38 deletions.
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,2 @@
+[flake8]
+max-line-length=100
diff --git a/.gitignore b/.gitignore
@@ -1,7 +1,6 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
-*$py.class
 
 # C extensions
 *.so
@@ -20,7 +19,6 @@ lib64/
 parts/
 sdist/
 var/
-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
@@ -43,59 +41,42 @@ htmlcov/
 .cache
 nosetests.xml
 coverage.xml
-*.cover
-.hypothesis/
+*,cover
 
 # Translations
 *.mo
 *.pot
 
 # Django stuff:
 *.log
-local_settings.py
-
-# Flask stuff:
-instance/
-.webassets-cache
-
-# Scrapy stuff:
-.scrapy
 
 # Sphinx documentation
 docs/_build/
 
 # PyBuilder
 target/
 
-# Jupyter Notebook
-.ipynb_checkpoints
-
-# pyenv
-.python-version
-
-# celery beat schedule file
-celerybeat-schedule
+# DotEnv configuration
+.env
 
-# SageMath parsed files
-*.sage.py
+# Database
+*.db
+*.rdb
 
-# dotenv
-.env
+# Pycharm
+.idea
 
-# virtualenv
-.venv
-venv/
-ENV/
+# VS Code
+.vscode/
 
-# Spyder project settings
-.spyderproject
-.spyproject
+# Spyder
+.spyproject/
 
-# Rope project settings
-.ropeproject
+# Jupyter NB Checkpoints
+.ipynb_checkpoints/
 
-# mkdocs documentation
-/site
+# exclude data from source control by default
+/data/
 
-# mypy
-.mypy_cache/
+# Mac OS-specific storage files
+.DS_Store
diff --git a/Makefile b/Makefile
@@ -0,0 +1,114 @@
+.PHONY: clean data lint requirements train
+
+#################################################################################
+# GLOBALS                                                                       #
+#################################################################################
+
+PROJECT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
+PROFILE = default
+PROJECT_NAME = deep-learning-addition
+PYTHON_INTERPRETER = python3
+
+#################################################################################
+# COMMANDS                                                                      #
+#################################################################################
+
+## Install Python Dependencies
+requirements: test_environment
+	pip install -r requirements.txt
+
+## Make Dataset
+data: requirements
+	PYTHONPATH='.' $(PYTHON_INTERPRETER) src/data/make_dataset.py ./data/processed/
+
+## Train Deep Learning Model
+train: requirements
+	PYTHONPATH='.' $(PYTHON_INTERPRETER) src/models/train_model.py ./data/processed/ ./models/
+
+## Delete all compiled Python files
+clean:
+	find . -type f -name "*.py[co]" -delete
+	find . -type d -name "__pycache__" -delete
+
+## Lint using flake8
+lint:
+	flake8 src
+
+## Set up python interpreter environment
+create_environment:
+	@pip install -q virtualenv virtualenvwrapper
+	@echo ">>> Installing virtualenvwrapper if not already intalled.\nMake sure the following lines are in shell startup file\n\
+	export WORKON_HOME=$$HOME/.virtualenvs\nexport PROJECT_HOME=$$HOME/Devel\nsource /usr/local/bin/virtualenvwrapper.sh\n"
+	@bash -c "source `which virtualenvwrapper.sh`;mkvirtualenv $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER)"
+	@echo ">>> New virtualenv created. Activate with:\nworkon $(PROJECT_NAME)"
+
+## Test python environment is setup correctly
+test_environment:
+	$(PYTHON_INTERPRETER) test_environment.py
+
+#################################################################################
+# PROJECT RULES                                                                 #
+#################################################################################
+
+
+
+#################################################################################
+# Self Documenting Commands                                                     #
+#################################################################################
+
+.DEFAULT_GOAL := show-help
+
+# Inspired by <http://marmelab.com/blog/2016/02/29/auto-documented-makefile.html>
+# sed script explained:
+# /^##/:
+# 	* save line in hold space
+# 	* purge line
+# 	* Loop:
+# 		* append newline + line to hold space
+# 		* go to next line
+# 		* if line starts with doc comment, strip comment character off and loop
+# 	* remove target prerequisites
+# 	* append hold space (+ newline) to line
+# 	* replace newline plus comments by `---`
+# 	* print line
+# Separate expressions are necessary because labels cannot be delimited by
+# semicolon; see <http://stackoverflow.com/a/11799865/1968>
+.PHONY: show-help
+show-help:
+	@echo "$$(tput bold)Available rules:$$(tput sgr0)"
+	@echo
+	@sed -n -e "/^## / { \
+		h; \
+		s/.*//; \
+		:doc" \
+		-e "H; \
+		n; \
+		s/^## //; \
+		t doc" \
+		-e "s/:.*//; \
+		G; \
+		s/\\n## /---/; \
+		s/\\n/ /g; \
+		p; \
+	}" ${MAKEFILE_LIST} \
+	| LC_ALL='C' sort --ignore-case \
+	| awk -F '---' \
+		-v ncol=$$(tput cols) \
+		-v indent=19 \
+		-v col_on="$$(tput setaf 6)" \
+		-v col_off="$$(tput sgr0)" \
+	'{ \
+		printf "%s%*s%s ", col_on, -indent, $$1, col_off; \
+		n = split($$2, words, " "); \
+		line_length = ncol - indent; \
+		for (i = 1; i <= n; i++) { \
+			line_length -= length(words[i]) + 1; \
+			if (line_length <= 0) { \
+				line_length = ncol - indent - length(words[i]) - 1; \
+				printf "\n%*s ", -indent, " "; \
+			} \
+			printf "%s ", words[i]; \
+		} \
+		printf "\n"; \
+	}' \
+	| more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars')
diff --git a/README.md b/README.md
@@ -1,2 +1,19 @@
 # deep-learning-addition
-An implementation of sequence to sequence learning for performing addition
+A LSTM model that learns to perform arithmetic addition for integers. It's a simple Deep Learning calculator.
+
+## Development
+
+- Python 3.6 and 2.7
+- For Python 2.7 replace the value of `REQUIRED_PYTHON` and `PYTHON_INTERPRETER` in `test_environment.py` and `Makefile`, respectively, to `python2`
+- [awscli](https://pypi.python.org/pypi/awscli) installed and configured
+
+## Commands
+```
+clean               Delete all compiled Python files 
+create_environment  Set up python interpreter environment 
+data                Make Dataset 
+lint                Lint using flake8 
+requirements        Install Python Dependencies 
+test_environment    Test python environment is setup correctly 
+train               Train Deep Learning Model 
+```
diff --git a/models/.gitkeep b/models/.gitkeep
diff --git a/notebooks/.gitkeep b/notebooks/.gitkeep
diff --git a/references/.gitkeep b/references/.gitkeep
diff --git a/reports/.gitkeep b/reports/.gitkeep
diff --git a/reports/figures/.gitkeep b/reports/figures/.gitkeep
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,51 @@
+absl-py==0.1.13
+alabaster==0.7.10
+astor==0.6.2
+awscli==1.15.4
+Babel==2.5.3
+bleach==1.5.0
+botocore==1.10.4
+certifi==2018.4.16
+chardet==3.0.4
+click==6.7
+colorama==0.3.7
+coverage==4.5.1
+docutils==0.14
+flake8==3.5.0
+gast==0.2.0
+grpcio==1.11.0
+h5py==2.7.1
+html5lib==0.9999999
+idna==2.6
+imagesize==1.0.0
+Jinja2==2.10
+jmespath==0.9.3
+Keras==2.1.5
+Markdown==2.6.11
+MarkupSafe==1.0
+mccabe==0.6.1
+numpy==1.14.2
+packaging==17.1
+protobuf==3.5.2.post1
+pyasn1==0.4.2
+pycodestyle==2.3.1
+pyflakes==1.6.0
+Pygments==2.2.0
+pyparsing==2.2.0
+python-dateutil==2.6.1
+python-dotenv==0.8.2
+pytz==2018.4
+PyYAML==3.12
+requests==2.18.4
+rsa==3.4.2
+s3transfer==0.1.13
+scipy==1.0.1
+six==1.11.0
+snowballstemmer==1.2.1
+Sphinx==1.7.2
+sphinxcontrib-websupport==1.0.1
+tensorboard==1.7.0
+tensorflow==1.7.0
+termcolor==1.1.0
+urllib3==1.22
+Werkzeug==0.14.1
diff --git a/src/__init__.py b/src/__init__.py
diff --git a/src/character_encoder.py b/src/character_encoder.py
@@ -0,0 +1,36 @@
+import numpy as np
+
+
+class CharacterTable(object):
+    """Given a set of characters:
+    + Encode them to a one hot integer representation
+    + Decode the one hot integer representation to their character output
+    + Decode a vector of probabilities to their character output
+    """
+
+    def __init__(self, chars):
+        """Initialize character table.
+
+        # Arguments
+            chars: Characters that can appear in the input.
+        """
+        self.chars = sorted(set(chars))
+        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
+        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
+
+    def encode(self, input_string, num_rows):
+        """One hot encode given string input_string.
+
+        # Arguments
+            num_rows: Number of rows in the returned one hot encoding. This is
+                used to keep the # of rows for each data the same.
+        """
+        x = np.zeros((num_rows, len(self.chars)))
+        for i, c in enumerate(input_string):
+            x[i, self.char_indices[c]] = 1
+        return x
+
+    def decode(self, x, calc_argmax=True):
+        if calc_argmax:
+            x = x.argmax(axis=-1)
+        return ''.join(self.indices_char[x] for x in x)
diff --git a/src/data/.gitkeep b/src/data/.gitkeep
diff --git a/src/data/__init__.py b/src/data/__init__.py