Skip to content

Commit

Permalink
Deep Learning addition commands
Browse files Browse the repository at this point in the history
  • Loading branch information
pm3310 committed Apr 24, 2018
1 parent bbb5205 commit be8f1bc
Show file tree
Hide file tree
Showing 24 changed files with 530 additions and 38 deletions.
2 changes: 2 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[flake8]
max-line-length=100
55 changes: 18 additions & 37 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so
Expand All @@ -20,7 +19,6 @@ lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
Expand All @@ -43,59 +41,42 @@ htmlcov/
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
*,cover

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule
# DotEnv configuration
.env

# SageMath parsed files
*.sage.py
# Database
*.db
*.rdb

# dotenv
.env
# Pycharm
.idea

# virtualenv
.venv
venv/
ENV/
# VS Code
.vscode/

# Spyder project settings
.spyderproject
.spyproject
# Spyder
.spyproject/

# Rope project settings
.ropeproject
# Jupyter NB Checkpoints
.ipynb_checkpoints/

# mkdocs documentation
/site
# exclude data from source control by default
/data/

# mypy
.mypy_cache/
# Mac OS-specific storage files
.DS_Store
114 changes: 114 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
.PHONY: clean data lint requirements train

#################################################################################
# GLOBALS #
#################################################################################

PROJECT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
PROFILE = default
PROJECT_NAME = deep-learning-addition
PYTHON_INTERPRETER = python3

#################################################################################
# COMMANDS #
#################################################################################

## Install Python Dependencies
requirements: test_environment
pip install -r requirements.txt

## Make Dataset
data: requirements
PYTHONPATH='.' $(PYTHON_INTERPRETER) src/data/make_dataset.py ./data/processed/

## Train Deep Learning Model
train: requirements
PYTHONPATH='.' $(PYTHON_INTERPRETER) src/models/train_model.py ./data/processed/ ./models/

## Delete all compiled Python files
clean:
find . -type f -name "*.py[co]" -delete
find . -type d -name "__pycache__" -delete

## Lint using flake8
lint:
flake8 src

## Set up python interpreter environment
create_environment:
@pip install -q virtualenv virtualenvwrapper
@echo ">>> Installing virtualenvwrapper if not already intalled.\nMake sure the following lines are in shell startup file\n\
export WORKON_HOME=$$HOME/.virtualenvs\nexport PROJECT_HOME=$$HOME/Devel\nsource /usr/local/bin/virtualenvwrapper.sh\n"
@bash -c "source `which virtualenvwrapper.sh`;mkvirtualenv $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER)"
@echo ">>> New virtualenv created. Activate with:\nworkon $(PROJECT_NAME)"

## Test python environment is setup correctly
test_environment:
$(PYTHON_INTERPRETER) test_environment.py

#################################################################################
# PROJECT RULES #
#################################################################################



#################################################################################
# Self Documenting Commands #
#################################################################################

.DEFAULT_GOAL := show-help

# Inspired by <http://marmelab.com/blog/2016/02/29/auto-documented-makefile.html>
# sed script explained:
# /^##/:
# * save line in hold space
# * purge line
# * Loop:
# * append newline + line to hold space
# * go to next line
# * if line starts with doc comment, strip comment character off and loop
# * remove target prerequisites
# * append hold space (+ newline) to line
# * replace newline plus comments by `---`
# * print line
# Separate expressions are necessary because labels cannot be delimited by
# semicolon; see <http://stackoverflow.com/a/11799865/1968>
.PHONY: show-help
show-help:
@echo "$$(tput bold)Available rules:$$(tput sgr0)"
@echo
@sed -n -e "/^## / { \
h; \
s/.*//; \
:doc" \
-e "H; \
n; \
s/^## //; \
t doc" \
-e "s/:.*//; \
G; \
s/\\n## /---/; \
s/\\n/ /g; \
p; \
}" ${MAKEFILE_LIST} \
| LC_ALL='C' sort --ignore-case \
| awk -F '---' \
-v ncol=$$(tput cols) \
-v indent=19 \
-v col_on="$$(tput setaf 6)" \
-v col_off="$$(tput sgr0)" \
'{ \
printf "%s%*s%s ", col_on, -indent, $$1, col_off; \
n = split($$2, words, " "); \
line_length = ncol - indent; \
for (i = 1; i <= n; i++) { \
line_length -= length(words[i]) + 1; \
if (line_length <= 0) { \
line_length = ncol - indent - length(words[i]) - 1; \
printf "\n%*s ", -indent, " "; \
} \
printf "%s ", words[i]; \
} \
printf "\n"; \
}' \
| more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars')
19 changes: 18 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,19 @@
# deep-learning-addition
An implementation of sequence to sequence learning for performing addition
A LSTM model that learns to perform arithmetic addition for integers. It's a simple Deep Learning calculator.

## Development

- Python 3.6 and 2.7
- For Python 2.7 replace the value of `REQUIRED_PYTHON` and `PYTHON_INTERPRETER` in `test_environment.py` and `Makefile`, respectively, to `python2`
- [awscli](https://pypi.python.org/pypi/awscli) installed and configured

## Commands
```
clean Delete all compiled Python files
create_environment Set up python interpreter environment
data Make Dataset
lint Lint using flake8
requirements Install Python Dependencies
test_environment Test python environment is setup correctly
train Train Deep Learning Model
```
Empty file added models/.gitkeep
Empty file.
Empty file added notebooks/.gitkeep
Empty file.
Empty file added references/.gitkeep
Empty file.
Empty file added reports/.gitkeep
Empty file.
Empty file added reports/figures/.gitkeep
Empty file.
51 changes: 51 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
absl-py==0.1.13
alabaster==0.7.10
astor==0.6.2
awscli==1.15.4
Babel==2.5.3
bleach==1.5.0
botocore==1.10.4
certifi==2018.4.16
chardet==3.0.4
click==6.7
colorama==0.3.7
coverage==4.5.1
docutils==0.14
flake8==3.5.0
gast==0.2.0
grpcio==1.11.0
h5py==2.7.1
html5lib==0.9999999
idna==2.6
imagesize==1.0.0
Jinja2==2.10
jmespath==0.9.3
Keras==2.1.5
Markdown==2.6.11
MarkupSafe==1.0
mccabe==0.6.1
numpy==1.14.2
packaging==17.1
protobuf==3.5.2.post1
pyasn1==0.4.2
pycodestyle==2.3.1
pyflakes==1.6.0
Pygments==2.2.0
pyparsing==2.2.0
python-dateutil==2.6.1
python-dotenv==0.8.2
pytz==2018.4
PyYAML==3.12
requests==2.18.4
rsa==3.4.2
s3transfer==0.1.13
scipy==1.0.1
six==1.11.0
snowballstemmer==1.2.1
Sphinx==1.7.2
sphinxcontrib-websupport==1.0.1
tensorboard==1.7.0
tensorflow==1.7.0
termcolor==1.1.0
urllib3==1.22
Werkzeug==0.14.1
Empty file added src/__init__.py
Empty file.
36 changes: 36 additions & 0 deletions src/character_encoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import numpy as np


class CharacterTable(object):
"""Given a set of characters:
+ Encode them to a one hot integer representation
+ Decode the one hot integer representation to their character output
+ Decode a vector of probabilities to their character output
"""

def __init__(self, chars):
"""Initialize character table.
# Arguments
chars: Characters that can appear in the input.
"""
self.chars = sorted(set(chars))
self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
self.indices_char = dict((i, c) for i, c in enumerate(self.chars))

def encode(self, input_string, num_rows):
"""One hot encode given string input_string.
# Arguments
num_rows: Number of rows in the returned one hot encoding. This is
used to keep the # of rows for each data the same.
"""
x = np.zeros((num_rows, len(self.chars)))
for i, c in enumerate(input_string):
x[i, self.char_indices[c]] = 1
return x

def decode(self, x, calc_argmax=True):
if calc_argmax:
x = x.argmax(axis=-1)
return ''.join(self.indices_char[x] for x in x)
Empty file added src/data/.gitkeep
Empty file.
Empty file added src/data/__init__.py
Empty file.
Loading

0 comments on commit be8f1bc

Please sign in to comment.