diff --git a/.condarc b/.condarc new file mode 100644 index 0000000..364cb1a --- /dev/null +++ b/.condarc @@ -0,0 +1,7 @@ +channels: + - defaults + +show_channel_urls: True + +create_default_packages: + - nomkl diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..17198e3 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,13 @@ +# Change Log + +All changes to this project will be documented in this file. + +Version number changes (major.minor.micro) in this package denote the following: +- A micro version will increase if the only change in a release is incrementing micro versions (bugfix-only releases) on the packages contained in this image. +- A minor version will increase if one or more packages contained in the Docker image add new, backwards-compatible features, or if a new package is added to the Docker image. +- A major version will increase if there are any backwards-incompatible changes in any of the packages contained in this Docker image, or any other backwards-incompabile changes in the execution environment. + +## [1.0.0] - 2017-01-17 + +* Initial Release + diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..06a295e --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,50 @@ +# Contributor Code of Conduct + +As contributors and maintainers of this project, and in the interest of +fostering an open and welcoming community, we pledge to respect all people who +contribute through reporting issues, posting feature requests, updating +documentation, submitting pull requests or patches, and other activities. + +We are committed to making participation in this project a harassment-free +experience for everyone, regardless of level of experience, gender, gender +identity and expression, sexual orientation, disability, personal appearance, +body size, race, ethnicity, age, religion, or nationality. + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery +* Personal attacks +* Trolling or insulting/derogatory comments +* Public or private harassment +* Publishing other's private information, such as physical or electronic + addresses, without explicit permission +* Other unethical or unprofessional conduct + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +By adopting this Code of Conduct, project maintainers commit themselves to +fairly and consistently applying these principles to every aspect of managing +this project. Project maintainers who do not follow or enforce the Code of +Conduct may be permanently removed from the project team. + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting a project maintainer at opensource@civisanalytics.com. +All complaints will be reviewed and investigated and will result in a response +that is deemed necessary and appropriate to the circumstances. Maintainers are +obligated to maintain confidentiality with regard to the reporter of an +incident. + + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 1.3.0, available at +[http://contributor-covenant.org/version/1/3/0/][version] + +[homepage]: http://contributor-covenant.org +[version]: http://contributor-covenant.org/version/1/3/0/ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..e437599 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,42 @@ +# Contributing to datascience-python + +We welcome bug reports and pull requests from everyone! +This project is intended to be a safe, welcoming space for collaboration, and +contributors are expected to adhere to the +[Contributor Covenant](http://contributor-covenant.org) code of conduct. + + +## Getting Started + +There are two ways to contribute: + +### File an Issue + +If you find a bug or think of a useful improvement, +file an issue in this GitHub repository. Make sure to +include details about what you think should be changed +and why. If you're reporting a bug, please provide +a minimum working example so that a maintainer can +reproduce the bug. + +### Modify the Docker image + +If you know exactly what needs to change, you can also +submit a pull request to propose the change. + +1. Fork it ( https://github.com/civisanalytics/datascience-python/fork ). +2. Make sure you are able to build the Docker image locally (`docker build -t datascience-python:test .`) +3. Create a feature branch (`git checkout -b my-new-feature`). +4. Make your change. +5. Make sure the new image still builds correctly. Test that your change is present in the new build. +6. Commit your changes (`git commit -am 'Add some feature'`). +7. Push to the branch (`git push origin my-new-feature`). +8. Create a new pull request with details about your changes. +9. If the build fails, address any issues. + +## Tips + +- Don’t forget to add your change to the [CHANGELOG](CHANGELOG.md). See + [Keep a CHANGELOG](http://keepachangelog.com/) for guidelines. + +Thank you for taking the time to contribute! diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..17ccd9f --- /dev/null +++ b/Dockerfile @@ -0,0 +1,54 @@ +FROM ubuntu:14.04 +MAINTAINER support@civisanalytics.com + +# Ensure UTF-8 locale. +RUN locale-gen en_US.UTF-8 + +# Set environment variables for UTF-8, conda, and shell environments +ENV LANG=en_US.UTF-8 \ + LANGUAGE=en_US:en \ + LC_ALL=en_US.UTF-8 \ + CONDARC=/opt/conda/.condarc \ + BASH_ENV=/etc/profile \ + PATH=/opt/conda/envs/datascience/bin:/opt/conda/bin:$PATH + +RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && \ + apt-get install -y software-properties-common && \ + apt-get install -y \ + make \ + automake \ + libpq-dev \ + libffi-dev \ + gfortran \ + g++ \ + git \ + libboost-program-options-dev \ + libtool \ + libxrender1 \ + wget \ + ca-certificates \ + curl + +RUN echo 'export PATH=/opt/conda/bin:$PATH' > /etc/profile.d/conda.sh && \ + wget --quiet https://repo.continuum.io/miniconda/Miniconda3-4.1.11-Linux-x86_64.sh && \ + /bin/bash /Miniconda3-4.1.11-Linux-x86_64.sh -b -p /opt/conda && \ + rm Miniconda3-4.1.11-Linux-x86_64.sh && \ + /opt/conda/bin/conda install --yes conda==4.1.11 + +# environment for bash +RUN echo "source activate datascience" >> /etc/bash.bashrc + +# Red Hat and Debian use different names for this file. git2R wants the latter. +# See conda-recipes GH 423 +RUN ln -s /opt/conda/lib/libopenblas.so /opt/conda/lib/libblas.so && \ + ln -s /opt/conda/lib/libopenblas.so /opt/conda/lib/liblapack.so && \ + ln -s /opt/conda/lib/libssl.so /opt/conda/lib/libssl.so.6 && \ + ln -s /opt/conda/lib/libcrypto.so /opt/conda/lib/libcrypto.so.6 + +# Install boto in the base environment for private s3 channel support. +# Install Python Packages +COPY .condarc /opt/conda/.condarc +COPY environment.yml environment.yml +RUN conda install -y boto && \ + conda install -y nomkl && \ + conda env create -f environment.yml diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..3a13e93 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,27 @@ +Copyright (c) 2017, Civis Analytics +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..25a3120 --- /dev/null +++ b/README.md @@ -0,0 +1,73 @@ +# Data Science Docker Image + +This image is created from the official Ubuntu 14.04 Docker image and contains popular Python packages for data science. + +# Introduction + +This repository defines the "[civisanalytics/datascience-python](https://hub.docker.com/r/civisanalytics/datascience-python/)" +Docker image. This Docker image provides an environment with data science tools +from the Python ecosystem. This image is the execution environment for Python +jobs in the [Civis data science platform](https://civisanalytics.com/products/civis-platform/), +and it includes the Civis [Python API client](https://github.com/civisanalytics/civis-python). + +# Installation + +Either build the Docker image locally +```bash +docker build -t datascience-python . +``` + +or download the image from DockerHub +```bash +docker pull civisanalytics/datascience-python:latest +``` + +The `latest` tag (Docker's default if you don't specify a tag) +will give you the most recently-built version of the datascience-python +image. You can replace the tag `latest` with a version number such as `1.0` +to retrieve a reproducible environment. + +# Usage + +Inside the datascience-python Docker image, Python packages are installed in a +conda environment named "datascience", which is automatically entered when a user +enters the container. For a full list of included Python libraries, see the +[environment.yml](environment.yml) file. + +To start a Docker container from the datascience-python image and +interact with it from a bash prompt, use +```bash +docker run -i -t civisanalytics/datascience-python:latest /bin/bash +``` + +You can run a Python command with +```bash +docker run civisanalytics/datascience-python:latest python -c "import pandas; print(pandas.__version__)" +``` + +# Contributing + +See [CONTRIBUTING](CONTRIBUTING.md) for information about contributing to this project. + +If you make any changes, be sure to build a container to verify that it successfully completes: +```bash +docker build -t datascience-python:test . +``` +and describe any changes in the [change log](CHANGELOG.md). + +## For Maintainers + +This repo has autobuild enabled. Any PR that is merged to master will +be built as the `latest` tag on Dockerhub. +Once you are ready to create a new version, go to the "releases" tab of the repository and click +"Draft a new release". Github will prompt you to create a new tag, release title, and release +description. The tag should use semantic versioning in the form "vX.X.X"; "major.minor.micro". +The title of the release should be the same as the tag. Include a change log in the release description. +Once the release is tagged, DockerHub will automatically build three identical containers, with labels +"major", "major.minor", and "major.minor.micro". + +# License + +BSD-3 + +See [LICENSE.md](LICENSE.md) for details. diff --git a/circle.yml b/circle.yml new file mode 100644 index 0000000..0ed28ec --- /dev/null +++ b/circle.yml @@ -0,0 +1,13 @@ +machine: + services: + - docker + +dependencies: + override: + - docker build -t civisanalytics/datascience-python . + +test: + override: + - docker run civisanalytics/datascience-python /bin/bash -c "echo BUILDS OK" + - docker run civisanalytics/datascience-python python -c "from scipy.linalg import _fblas" + - docker run civisanalytics/datascience-python python -c "import numpy, matplotlib, os; matplotlib.use('Agg'); import matplotlib.pyplot as plt; x = numpy.arange(100); y = numpy.sin(x); plt.plot(x, y);" diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..39c9d94 --- /dev/null +++ b/environment.yml @@ -0,0 +1,43 @@ +name: datascience +dependencies: +- beautifulsoup4=4.5.1 +- cython=0.25.1 +- ipython=5.1.0 +- jsonschema=2.5.1 +- jupyter=1.0.0 +- jinja2=2.8 +- libffi=3.2.1 +- libgcc=5.2.0 +- libgfortran=3.0.0 +- libsodium=1.0.10 +- libtiff=4.0.6 +- libxml2=2.9.2 +- matplotlib=1.5.3 +- nomkl=1.0 +- nose=1.3.7 +- nltk=3.2.1 +- numexpr=2.6.1 +- numpy=1.11.2 +- openblas=0.2.14 +- pandas=0.19.1 +- patsy=0.4.1 +- psycopg2=2.6.2 +- pycrypto=2.6.1 +- pytest=3.0.5 +- python=3.5.2 +- pyyaml=3.11 +- requests=2.12.1 +- seaborn=0.7.1 +- scipy=0.18.1 +- scikit-learn=0.18.1 +- statsmodels=0.6.1 +- pip: + - awscli==1.11.27 + - boto==2.43.0 + - boto3==1.4.2 + - civis==1.1.0 + - dropbox==7.1.1 + - ftputil==3.3.1 + - pysftp==0.2.9 + - python-simple-hipchat==0.4.0 + - urllib3==1.19 \ No newline at end of file