From c4be98614ed89b292ba7f39381e2eca0f2d3f58d Mon Sep 17 00:00:00 2001 From: bot Date: Tue, 18 Jun 2024 07:21:29 +0000 Subject: [PATCH] [automation] transform lesson to sandpaper --- .github/workflows/README.md | 198 +++++++ .github/workflows/pr-close-signal.yaml | 23 + .github/workflows/pr-comment.yaml | 185 +++++++ .github/workflows/pr-post-remove-branch.yaml | 32 ++ .github/workflows/pr-preflight.yaml | 39 ++ .github/workflows/pr-receive.yaml | 131 +++++ .github/workflows/sandpaper-main.yaml | 61 +++ .github/workflows/sandpaper-version.txt | 1 + .github/workflows/update-cache.yaml | 125 +++++ .github/workflows/update-workflows.yaml | 66 +++ .github/workflows/workbench-beta-phase.yml | 60 +++ .gitignore | 43 +- CODE_OF_CONDUCT.md | 8 +- CONTRIBUTING.md | 258 ++++----- LICENSE.md | 99 ++-- README.md | 35 +- _extras/figures.md | 68 --- config.yaml | 90 ++++ episodes/advanced-containers.md | 505 ++++++++++++++++++ episodes/creating-container-images.md | 362 +++++++++++++ {_extras => episodes/data}/.gitkeep | 0 episodes/docker-hub.md | 195 +++++++ episodes/docker-image-examples.md | 83 +++ {data => episodes/fig}/.gitkeep | 0 .../fig}/github-gh-pages-branch.png | Bin {fig => episodes/fig}/github-io-pages.png | Bin {fig => episodes/fig}/github-main-branch.png | Bin {fig => episodes/files}/.gitkeep | 0 {files => episodes/files}/docker-intro.zip | Bin episodes/introduction.md | 214 ++++++++ episodes/managing-containers.md | 184 +++++++ episodes/meet-docker.md | 364 +++++++++++++ episodes/reproduciblity.md | 171 ++++++ episodes/running-containers.md | 372 +++++++++++++ files/.gitkeep | 0 index.md | 117 ++-- .../06-containers-on-the-cloud.md | 50 +- {_extras => instructors}/08-orchestration.md | 45 +- {_extras => instructors}/about.md | 5 +- .../e01-github-actions.md | 139 ++--- .../e02-jekyll-lesson-example.md | 72 ++- .../instructor-notes.md | 170 +++--- {_extras => learners}/discuss.md | 5 +- reference.md => learners/reference.md | 10 +- setup.md => learners/setup.md | 107 ++-- profiles/learner-profiles.md | 5 + site/README.md | 2 + 47 files changed, 4107 insertions(+), 592 deletions(-) create mode 100644 .github/workflows/README.md create mode 100644 .github/workflows/pr-close-signal.yaml create mode 100644 .github/workflows/pr-comment.yaml create mode 100644 .github/workflows/pr-post-remove-branch.yaml create mode 100644 .github/workflows/pr-preflight.yaml create mode 100644 .github/workflows/pr-receive.yaml create mode 100644 .github/workflows/sandpaper-main.yaml create mode 100644 .github/workflows/sandpaper-version.txt create mode 100644 .github/workflows/update-cache.yaml create mode 100644 .github/workflows/update-workflows.yaml create mode 100644 .github/workflows/workbench-beta-phase.yml delete mode 100644 _extras/figures.md create mode 100644 config.yaml create mode 100644 episodes/advanced-containers.md create mode 100644 episodes/creating-container-images.md rename {_extras => episodes/data}/.gitkeep (100%) create mode 100644 episodes/docker-hub.md create mode 100644 episodes/docker-image-examples.md rename {data => episodes/fig}/.gitkeep (100%) rename {fig => episodes/fig}/github-gh-pages-branch.png (100%) rename {fig => episodes/fig}/github-io-pages.png (100%) rename {fig => episodes/fig}/github-main-branch.png (100%) rename {fig => episodes/files}/.gitkeep (100%) rename {files => episodes/files}/docker-intro.zip (100%) create mode 100644 episodes/introduction.md create mode 100644 episodes/managing-containers.md create mode 100644 episodes/meet-docker.md create mode 100644 episodes/reproduciblity.md create mode 100644 episodes/running-containers.md delete mode 100644 files/.gitkeep rename {_extras => instructors}/06-containers-on-the-cloud.md (78%) rename {_extras => instructors}/08-orchestration.md (62%) rename {_extras => instructors}/about.md (69%) rename {_extras => instructors}/e01-github-actions.md (85%) rename {_extras => instructors}/e02-jekyll-lesson-example.md (76%) rename _extras/guide.md => instructors/instructor-notes.md (55%) rename {_extras => learners}/discuss.md (58%) rename reference.md => learners/reference.md (94%) rename setup.md => learners/setup.md (73%) create mode 100644 profiles/learner-profiles.md create mode 100644 site/README.md diff --git a/.github/workflows/README.md b/.github/workflows/README.md new file mode 100644 index 000000000..d6edf88dd --- /dev/null +++ b/.github/workflows/README.md @@ -0,0 +1,198 @@ +# Carpentries Workflows + +This directory contains workflows to be used for Lessons using the {sandpaper} +lesson infrastructure. Two of these workflows require R (`sandpaper-main.yaml` +and `pr-recieve.yaml`) and the rest are bots to handle pull request management. + +These workflows will likely change as {sandpaper} evolves, so it is important to +keep them up-to-date. To do this in your lesson you can do the following in your +R console: + +```r +# Install/Update sandpaper +options(repos = c(carpentries = "https://carpentries.r-universe.dev/", + CRAN = "https://cloud.r-project.org")) +install.packages("sandpaper") + +# update the workflows in your lesson +library("sandpaper") +update_github_workflows() +``` + +Inside this folder, you will find a file called `sandpaper-version.txt`, which +will contain a version number for sandpaper. This will be used in the future to +alert you if a workflow update is needed. + +What follows are the descriptions of the workflow files: + +## Deployment + +### 01 Build and Deploy (sandpaper-main.yaml) + +This is the main driver that will only act on the main branch of the repository. +This workflow does the following: + + 1. checks out the lesson + 2. provisions the following resources + - R + - pandoc + - lesson infrastructure (stored in a cache) + - lesson dependencies if needed (stored in a cache) + 3. builds the lesson via `sandpaper:::ci_deploy()` + +#### Caching + +This workflow has two caches; one cache is for the lesson infrastructure and +the other is for the the lesson dependencies if the lesson contains rendered +content. These caches are invalidated by new versions of the infrastructure and +the `renv.lock` file, respectively. If there is a problem with the cache, +manual invaliation is necessary. You will need maintain access to the repository +and you can either go to the actions tab and [click on the caches button to find +and invalidate the failing cache](https://github.blog/changelog/2022-10-20-manage-caches-in-your-actions-workflows-from-web-interface/) +or by setting the `CACHE_VERSION` secret to the current date (which will +invalidate all of the caches). + +## Updates + +### Setup Information + +These workflows run on a schedule and at the maintainer's request. Because they +create pull requests that update workflows/require the downstream actions to run, +they need a special repository/organization secret token called +`SANDPAPER_WORKFLOW` and it must have the `public_repo` and `workflow` scope. + +This can be an individual user token, OR it can be a trusted bot account. If you +have a repository in one of the official Carpentries accounts, then you do not +need to worry about this token being present because the Carpentries Core Team +will take care of supplying this token. + +If you want to use your personal account: you can go to + +to create a token. Once you have created your token, you should copy it to your +clipboard and then go to your repository's settings > secrets > actions and +create or edit the `SANDPAPER_WORKFLOW` secret, pasting in the generated token. + +If you do not specify your token correctly, the runs will not fail and they will +give you instructions to provide the token for your repository. + +### 02 Maintain: Update Workflow Files (update-workflow.yaml) + +The {sandpaper} repository was designed to do as much as possible to separate +the tools from the content. For local builds, this is absolutely true, but +there is a minor issue when it comes to workflow files: they must live inside +the repository. + +This workflow ensures that the workflow files are up-to-date. The way it work is +to download the update-workflows.sh script from GitHub and run it. The script +will do the following: + +1. check the recorded version of sandpaper against the current version on github +2. update the files if there is a difference in versions + +After the files are updated, if there are any changes, they are pushed to a +branch called `update/workflows` and a pull request is created. Maintainers are +encouraged to review the changes and accept the pull request if the outputs +are okay. + +This update is run ~~weekly or~~ on demand. + +### 03 Maintain: Update Package Cache (update-cache.yaml) + +For lessons that have generated content, we use {renv} to ensure that the output +is stable. This is controlled by a single lockfile which documents the packages +needed for the lesson and the version numbers. This workflow is skipped in +lessons that do not have generated content. + +Because the lessons need to remain current with the package ecosystem, it's a +good idea to make sure these packages can be updated periodically. The +update cache workflow will do this by checking for updates, applying them in a +branch called `updates/packages` and creating a pull request with _only the +lockfile changed_. + +From here, the markdown documents will be rebuilt and you can inspect what has +changed based on how the packages have updated. + +## Pull Request and Review Management + +Because our lessons execute code, pull requests are a secruity risk for any +lesson and thus have security measures associted with them. **Do not merge any +pull requests that do not pass checks and do not have bots commented on them.** + +This series of workflows all go together and are described in the following +diagram and the below sections: + +![Graph representation of a pull request](https://carpentries.github.io/sandpaper/articles/img/pr-flow.dot.svg) + +### Pre Flight Pull Request Validation (pr-preflight.yaml) + +This workflow runs every time a pull request is created and its purpose is to +validate that the pull request is okay to run. This means the following things: + +1. The pull request does not contain modified workflow files +2. If the pull request contains modified workflow files, it does not contain + modified content files (such as a situation where @carpentries-bot will + make an automated pull request) +3. The pull request does not contain an invalid commit hash (e.g. from a fork + that was made before a lesson was transitioned from styles to use the + workbench). + +Once the checks are finished, a comment is issued to the pull request, which +will allow maintainers to determine if it is safe to run the +"Receive Pull Request" workflow from new contributors. + +### Recieve Pull Request (pr-recieve.yaml) + +**Note of caution:** This workflow runs arbitrary code by anyone who creates a +pull request. GitHub has safeguarded the token used in this workflow to have no +priviledges in the repository, but we have taken precautions to protect against +spoofing. + +This workflow is triggered with every push to a pull request. If this workflow +is already running and a new push is sent to the pull request, the workflow +running from the previous push will be cancelled and a new workflow run will be +started. + +The first step of this workflow is to check if it is valid (e.g. that no +workflow files have been modified). If there are workflow files that have been +modified, a comment is made that indicates that the workflow is not run. If +both a workflow file and lesson content is modified, an error will occurr. + +The second step (if valid) is to build the generated content from the pull +request. This builds the content and uploads three artifacts: + +1. The pull request number (pr) +2. A summary of changes after the rendering process (diff) +3. The rendered files (build) + +Because this workflow builds generated content, it follows the same general +process as the `sandpaper-main` workflow with the same caching mechanisms. + +The artifacts produced are used by the next workflow. + +### Comment on Pull Request (pr-comment.yaml) + +This workflow is triggered if the `pr-recieve.yaml` workflow is successful. +The steps in this workflow are: + +1. Test if the workflow is valid and comment the validity of the workflow to the + pull request. +2. If it is valid: create an orphan branch with two commits: the current state + of the repository and the proposed changes. +3. If it is valid: update the pull request comment with the summary of changes + +Importantly: if the pull request is invalid, the branch is not created so any +malicious code is not published. + +From here, the maintainer can request changes from the author and eventually +either merge or reject the PR. When this happens, if the PR was valid, the +preview branch needs to be deleted. + +### Send Close PR Signal (pr-close-signal.yaml) + +Triggered any time a pull request is closed. This emits an artifact that is the +pull request number for the next action + +### Remove Pull Request Branch (pr-post-remove-branch.yaml) + +Tiggered by `pr-close-signal.yaml`. This removes the temporary branch associated with +the pull request (if it was created). diff --git a/.github/workflows/pr-close-signal.yaml b/.github/workflows/pr-close-signal.yaml new file mode 100644 index 000000000..9b129d5d2 --- /dev/null +++ b/.github/workflows/pr-close-signal.yaml @@ -0,0 +1,23 @@ +name: "Bot: Send Close Pull Request Signal" + +on: + pull_request: + types: + [closed] + +jobs: + send-close-signal: + name: "Send closing signal" + runs-on: ubuntu-latest + if: ${{ github.event.action == 'closed' }} + steps: + - name: "Create PRtifact" + run: | + mkdir -p ./pr + printf ${{ github.event.number }} > ./pr/NUM + - name: Upload Diff + uses: actions/upload-artifact@v3 + with: + name: pr + path: ./pr + diff --git a/.github/workflows/pr-comment.yaml b/.github/workflows/pr-comment.yaml new file mode 100644 index 000000000..bb2eb03cd --- /dev/null +++ b/.github/workflows/pr-comment.yaml @@ -0,0 +1,185 @@ +name: "Bot: Comment on the Pull Request" + +# read-write repo token +# access to secrets +on: + workflow_run: + workflows: ["Receive Pull Request"] + types: + - completed + +concurrency: + group: pr-${{ github.event.workflow_run.pull_requests[0].number }} + cancel-in-progress: true + + +jobs: + # Pull requests are valid if: + # - they match the sha of the workflow run head commit + # - they are open + # - no .github files were committed + test-pr: + name: "Test if pull request is valid" + runs-on: ubuntu-latest + if: > + github.event.workflow_run.event == 'pull_request' && + github.event.workflow_run.conclusion == 'success' + outputs: + is_valid: ${{ steps.check-pr.outputs.VALID }} + payload: ${{ steps.check-pr.outputs.payload }} + number: ${{ steps.get-pr.outputs.NUM }} + msg: ${{ steps.check-pr.outputs.MSG }} + steps: + - name: 'Download PR artifact' + id: dl + uses: carpentries/actions/download-workflow-artifact@main + with: + run: ${{ github.event.workflow_run.id }} + name: 'pr' + + - name: "Get PR Number" + if: ${{ steps.dl.outputs.success == 'true' }} + id: get-pr + run: | + unzip pr.zip + echo "NUM=$(<./NR)" >> $GITHUB_OUTPUT + + - name: "Fail if PR number was not present" + id: bad-pr + if: ${{ steps.dl.outputs.success != 'true' }} + run: | + echo '::error::A pull request number was not recorded. The pull request that triggered this workflow is likely malicious.' + exit 1 + - name: "Get Invalid Hashes File" + id: hash + run: | + echo "json<> $GITHUB_OUTPUT + - name: "Check PR" + id: check-pr + if: ${{ steps.dl.outputs.success == 'true' }} + uses: carpentries/actions/check-valid-pr@main + with: + pr: ${{ steps.get-pr.outputs.NUM }} + sha: ${{ github.event.workflow_run.head_sha }} + headroom: 3 # if it's within the last three commits, we can keep going, because it's likely rapid-fire + invalid: ${{ fromJSON(steps.hash.outputs.json)[github.repository] }} + fail_on_error: true + + # Create an orphan branch on this repository with two commits + # - the current HEAD of the md-outputs branch + # - the output from running the current HEAD of the pull request through + # the md generator + create-branch: + name: "Create Git Branch" + needs: test-pr + runs-on: ubuntu-latest + if: ${{ needs.test-pr.outputs.is_valid == 'true' }} + env: + NR: ${{ needs.test-pr.outputs.number }} + permissions: + contents: write + steps: + - name: 'Checkout md outputs' + uses: actions/checkout@v3 + with: + ref: md-outputs + path: built + fetch-depth: 1 + + - name: 'Download built markdown' + id: dl + uses: carpentries/actions/download-workflow-artifact@main + with: + run: ${{ github.event.workflow_run.id }} + name: 'built' + + - if: ${{ steps.dl.outputs.success == 'true' }} + run: unzip built.zip + + - name: "Create orphan and push" + if: ${{ steps.dl.outputs.success == 'true' }} + run: | + cd built/ + git config --local user.email "actions@github.com" + git config --local user.name "GitHub Actions" + CURR_HEAD=$(git rev-parse HEAD) + git checkout --orphan md-outputs-PR-${NR} + git add -A + git commit -m "source commit: ${CURR_HEAD}" + ls -A | grep -v '^.git$' | xargs -I _ rm -r '_' + cd .. + unzip -o -d built built.zip + cd built + git add -A + git commit --allow-empty -m "differences for PR #${NR}" + git push -u --force --set-upstream origin md-outputs-PR-${NR} + + # Comment on the Pull Request with a link to the branch and the diff + comment-pr: + name: "Comment on Pull Request" + needs: [test-pr, create-branch] + runs-on: ubuntu-latest + if: ${{ needs.test-pr.outputs.is_valid == 'true' }} + env: + NR: ${{ needs.test-pr.outputs.number }} + permissions: + pull-requests: write + steps: + - name: 'Download comment artifact' + id: dl + uses: carpentries/actions/download-workflow-artifact@main + with: + run: ${{ github.event.workflow_run.id }} + name: 'diff' + + - if: ${{ steps.dl.outputs.success == 'true' }} + run: unzip ${{ github.workspace }}/diff.zip + + - name: "Comment on PR" + id: comment-diff + if: ${{ steps.dl.outputs.success == 'true' }} + uses: carpentries/actions/comment-diff@main + with: + pr: ${{ env.NR }} + path: ${{ github.workspace }}/diff.md + + # Comment if the PR is open and matches the SHA, but the workflow files have + # changed + comment-changed-workflow: + name: "Comment if workflow files have changed" + needs: test-pr + runs-on: ubuntu-latest + if: ${{ always() && needs.test-pr.outputs.is_valid == 'false' }} + env: + NR: ${{ github.event.workflow_run.pull_requests[0].number }} + body: ${{ needs.test-pr.outputs.msg }} + permissions: + pull-requests: write + steps: + - name: 'Check for spoofing' + id: dl + uses: carpentries/actions/download-workflow-artifact@main + with: + run: ${{ github.event.workflow_run.id }} + name: 'built' + + - name: 'Alert if spoofed' + id: spoof + if: ${{ steps.dl.outputs.success == 'true' }} + run: | + echo 'body<> $GITHUB_ENV + echo '' >> $GITHUB_ENV + echo '## :x: DANGER :x:' >> $GITHUB_ENV + echo 'This pull request has modified workflows that created output. Close this now.' >> $GITHUB_ENV + echo '' >> $GITHUB_ENV + echo 'EOF' >> $GITHUB_ENV + + - name: "Comment on PR" + id: comment-diff + uses: carpentries/actions/comment-diff@main + with: + pr: ${{ env.NR }} + body: ${{ env.body }} + diff --git a/.github/workflows/pr-post-remove-branch.yaml b/.github/workflows/pr-post-remove-branch.yaml new file mode 100644 index 000000000..62c2e98d4 --- /dev/null +++ b/.github/workflows/pr-post-remove-branch.yaml @@ -0,0 +1,32 @@ +name: "Bot: Remove Temporary PR Branch" + +on: + workflow_run: + workflows: ["Bot: Send Close Pull Request Signal"] + types: + - completed + +jobs: + delete: + name: "Delete branch from Pull Request" + runs-on: ubuntu-latest + if: > + github.event.workflow_run.event == 'pull_request' && + github.event.workflow_run.conclusion == 'success' + permissions: + contents: write + steps: + - name: 'Download artifact' + uses: carpentries/actions/download-workflow-artifact@main + with: + run: ${{ github.event.workflow_run.id }} + name: pr + - name: "Get PR Number" + id: get-pr + run: | + unzip pr.zip + echo "NUM=$(<./NUM)" >> $GITHUB_OUTPUT + - name: 'Remove branch' + uses: carpentries/actions/remove-branch@main + with: + pr: ${{ steps.get-pr.outputs.NUM }} diff --git a/.github/workflows/pr-preflight.yaml b/.github/workflows/pr-preflight.yaml new file mode 100644 index 000000000..d0d7420dc --- /dev/null +++ b/.github/workflows/pr-preflight.yaml @@ -0,0 +1,39 @@ +name: "Pull Request Preflight Check" + +on: + pull_request_target: + branches: + ["main"] + types: + ["opened", "synchronize", "reopened"] + +jobs: + test-pr: + name: "Test if pull request is valid" + if: ${{ github.event.action != 'closed' }} + runs-on: ubuntu-latest + outputs: + is_valid: ${{ steps.check-pr.outputs.VALID }} + permissions: + pull-requests: write + steps: + - name: "Get Invalid Hashes File" + id: hash + run: | + echo "json<> $GITHUB_OUTPUT + - name: "Check PR" + id: check-pr + uses: carpentries/actions/check-valid-pr@main + with: + pr: ${{ github.event.number }} + invalid: ${{ fromJSON(steps.hash.outputs.json)[github.repository] }} + fail_on_error: true + - name: "Comment result of validation" + id: comment-diff + if: ${{ always() }} + uses: carpentries/actions/comment-diff@main + with: + pr: ${{ github.event.number }} + body: ${{ steps.check-pr.outputs.MSG }} diff --git a/.github/workflows/pr-receive.yaml b/.github/workflows/pr-receive.yaml new file mode 100644 index 000000000..371ef542b --- /dev/null +++ b/.github/workflows/pr-receive.yaml @@ -0,0 +1,131 @@ +name: "Receive Pull Request" + +on: + pull_request: + types: + [opened, synchronize, reopened] + +concurrency: + group: ${{ github.ref }} + cancel-in-progress: true + +jobs: + test-pr: + name: "Record PR number" + if: ${{ github.event.action != 'closed' }} + runs-on: ubuntu-latest + outputs: + is_valid: ${{ steps.check-pr.outputs.VALID }} + steps: + - name: "Record PR number" + id: record + if: ${{ always() }} + run: | + echo ${{ github.event.number }} > ${{ github.workspace }}/NR # 2022-03-02: artifact name fixed to be NR + - name: "Upload PR number" + id: upload + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: pr + path: ${{ github.workspace }}/NR + - name: "Get Invalid Hashes File" + id: hash + run: | + echo "json<> $GITHUB_OUTPUT + - name: "echo output" + run: | + echo "${{ steps.hash.outputs.json }}" + - name: "Check PR" + id: check-pr + uses: carpentries/actions/check-valid-pr@main + with: + pr: ${{ github.event.number }} + invalid: ${{ fromJSON(steps.hash.outputs.json)[github.repository] }} + + build-md-source: + name: "Build markdown source files if valid" + needs: test-pr + runs-on: ubuntu-latest + if: ${{ needs.test-pr.outputs.is_valid == 'true' }} + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + RENV_PATHS_ROOT: ~/.local/share/renv/ + CHIVE: ${{ github.workspace }}/site/chive + PR: ${{ github.workspace }}/site/pr + MD: ${{ github.workspace }}/site/built + steps: + - name: "Check Out Main Branch" + uses: actions/checkout@v3 + + - name: "Check Out Staging Branch" + uses: actions/checkout@v3 + with: + ref: md-outputs + path: ${{ env.MD }} + + - name: "Set up R" + uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + install-r: false + + - name: "Set up Pandoc" + uses: r-lib/actions/setup-pandoc@v2 + + - name: "Setup Lesson Engine" + uses: carpentries/actions/setup-sandpaper@main + with: + cache-version: ${{ secrets.CACHE_VERSION }} + + - name: "Setup Package Cache" + uses: carpentries/actions/setup-lesson-deps@main + with: + cache-version: ${{ secrets.CACHE_VERSION }} + + - name: "Validate and Build Markdown" + id: build-site + run: | + sandpaper::package_cache_trigger(TRUE) + sandpaper::validate_lesson(path = '${{ github.workspace }}') + sandpaper:::build_markdown(path = '${{ github.workspace }}', quiet = FALSE) + shell: Rscript {0} + + - name: "Generate Artifacts" + id: generate-artifacts + run: | + sandpaper:::ci_bundle_pr_artifacts( + repo = '${{ github.repository }}', + pr_number = '${{ github.event.number }}', + path_md = '${{ env.MD }}', + path_pr = '${{ env.PR }}', + path_archive = '${{ env.CHIVE }}', + branch = 'md-outputs' + ) + shell: Rscript {0} + + - name: "Upload PR" + uses: actions/upload-artifact@v3 + with: + name: pr + path: ${{ env.PR }} + + - name: "Upload Diff" + uses: actions/upload-artifact@v3 + with: + name: diff + path: ${{ env.CHIVE }} + retention-days: 1 + + - name: "Upload Build" + uses: actions/upload-artifact@v3 + with: + name: built + path: ${{ env.MD }} + retention-days: 1 + + - name: "Teardown" + run: sandpaper::reset_site() + shell: Rscript {0} diff --git a/.github/workflows/sandpaper-main.yaml b/.github/workflows/sandpaper-main.yaml new file mode 100644 index 000000000..e17707acd --- /dev/null +++ b/.github/workflows/sandpaper-main.yaml @@ -0,0 +1,61 @@ +name: "01 Build and Deploy Site" + +on: + push: + branches: + - main + - master + schedule: + - cron: '0 0 * * 2' + workflow_dispatch: + inputs: + name: + description: 'Who triggered this build?' + required: true + default: 'Maintainer (via GitHub)' + reset: + description: 'Reset cached markdown files' + required: false + default: false + type: boolean +jobs: + full-build: + name: "Build Full Site" + runs-on: ubuntu-latest + permissions: + checks: write + contents: write + pages: write + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + RENV_PATHS_ROOT: ~/.local/share/renv/ + steps: + + - name: "Checkout Lesson" + uses: actions/checkout@v3 + + - name: "Set up R" + uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + install-r: false + + - name: "Set up Pandoc" + uses: r-lib/actions/setup-pandoc@v2 + + - name: "Setup Lesson Engine" + uses: carpentries/actions/setup-sandpaper@main + with: + cache-version: ${{ secrets.CACHE_VERSION }} + + - name: "Setup Package Cache" + uses: carpentries/actions/setup-lesson-deps@main + with: + cache-version: ${{ secrets.CACHE_VERSION }} + + - name: "Deploy Site" + run: | + reset <- "${{ github.event.inputs.reset }}" == "true" + sandpaper::package_cache_trigger(TRUE) + sandpaper:::ci_deploy(reset = reset) + shell: Rscript {0} diff --git a/.github/workflows/sandpaper-version.txt b/.github/workflows/sandpaper-version.txt new file mode 100644 index 000000000..201a22c8f --- /dev/null +++ b/.github/workflows/sandpaper-version.txt @@ -0,0 +1 @@ +0.16.2 diff --git a/.github/workflows/update-cache.yaml b/.github/workflows/update-cache.yaml new file mode 100644 index 000000000..676d7424c --- /dev/null +++ b/.github/workflows/update-cache.yaml @@ -0,0 +1,125 @@ +name: "03 Maintain: Update Package Cache" + +on: + workflow_dispatch: + inputs: + name: + description: 'Who triggered this build (enter github username to tag yourself)?' + required: true + default: 'monthly run' + schedule: + # Run every tuesday + - cron: '0 0 * * 2' + +jobs: + preflight: + name: "Preflight Check" + runs-on: ubuntu-latest + outputs: + ok: ${{ steps.check.outputs.ok }} + steps: + - id: check + run: | + if [[ ${{ github.event_name }} == 'workflow_dispatch' ]]; then + echo "ok=true" >> $GITHUB_OUTPUT + echo "Running on request" + # using single brackets here to avoid 08 being interpreted as octal + # https://github.com/carpentries/sandpaper/issues/250 + elif [ `date +%d` -le 7 ]; then + # If the Tuesday lands in the first week of the month, run it + echo "ok=true" >> $GITHUB_OUTPUT + echo "Running on schedule" + else + echo "ok=false" >> $GITHUB_OUTPUT + echo "Not Running Today" + fi + + check_renv: + name: "Check if We Need {renv}" + runs-on: ubuntu-latest + needs: preflight + if: ${{ needs.preflight.outputs.ok == 'true'}} + outputs: + needed: ${{ steps.renv.outputs.exists }} + steps: + - name: "Checkout Lesson" + uses: actions/checkout@v3 + - id: renv + run: | + if [[ -d renv ]]; then + echo "exists=true" >> $GITHUB_OUTPUT + fi + + check_token: + name: "Check SANDPAPER_WORKFLOW token" + runs-on: ubuntu-latest + needs: check_renv + if: ${{ needs.check_renv.outputs.needed == 'true' }} + outputs: + workflow: ${{ steps.validate.outputs.wf }} + repo: ${{ steps.validate.outputs.repo }} + steps: + - name: "validate token" + id: validate + uses: carpentries/actions/check-valid-credentials@main + with: + token: ${{ secrets.SANDPAPER_WORKFLOW }} + + update_cache: + name: "Update Package Cache" + needs: check_token + if: ${{ needs.check_token.outputs.repo== 'true' }} + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + RENV_PATHS_ROOT: ~/.local/share/renv/ + steps: + + - name: "Checkout Lesson" + uses: actions/checkout@v3 + + - name: "Set up R" + uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + install-r: false + + - name: "Update {renv} deps and determine if a PR is needed" + id: update + uses: carpentries/actions/update-lockfile@main + with: + cache-version: ${{ secrets.CACHE_VERSION }} + + - name: Create Pull Request + id: cpr + if: ${{ steps.update.outputs.n > 0 }} + uses: carpentries/create-pull-request@main + with: + token: ${{ secrets.SANDPAPER_WORKFLOW }} + delete-branch: true + branch: "update/packages" + commit-message: "[actions] update ${{ steps.update.outputs.n }} packages" + title: "Update ${{ steps.update.outputs.n }} packages" + body: | + :robot: This is an automated build + + This will update ${{ steps.update.outputs.n }} packages in your lesson with the following versions: + + ``` + ${{ steps.update.outputs.report }} + ``` + + :stopwatch: In a few minutes, a comment will appear that will show you how the output has changed based on these updates. + + If you want to inspect these changes locally, you can use the following code to check out a new branch: + + ```bash + git fetch origin update/packages + git checkout update/packages + ``` + + - Auto-generated by [create-pull-request][1] on ${{ steps.update.outputs.date }} + + [1]: https://github.com/carpentries/create-pull-request/tree/main + labels: "type: package cache" + draft: false diff --git a/.github/workflows/update-workflows.yaml b/.github/workflows/update-workflows.yaml new file mode 100644 index 000000000..288bcd139 --- /dev/null +++ b/.github/workflows/update-workflows.yaml @@ -0,0 +1,66 @@ +name: "02 Maintain: Update Workflow Files" + +on: + workflow_dispatch: + inputs: + name: + description: 'Who triggered this build (enter github username to tag yourself)?' + required: true + default: 'weekly run' + clean: + description: 'Workflow files/file extensions to clean (no wildcards, enter "" for none)' + required: false + default: '.yaml' + schedule: + # Run every Tuesday + - cron: '0 0 * * 2' + +jobs: + check_token: + name: "Check SANDPAPER_WORKFLOW token" + runs-on: ubuntu-latest + outputs: + workflow: ${{ steps.validate.outputs.wf }} + repo: ${{ steps.validate.outputs.repo }} + steps: + - name: "validate token" + id: validate + uses: carpentries/actions/check-valid-credentials@main + with: + token: ${{ secrets.SANDPAPER_WORKFLOW }} + + update_workflow: + name: "Update Workflow" + runs-on: ubuntu-latest + needs: check_token + if: ${{ needs.check_token.outputs.workflow == 'true' }} + steps: + - name: "Checkout Repository" + uses: actions/checkout@v3 + + - name: Update Workflows + id: update + uses: carpentries/actions/update-workflows@main + with: + clean: ${{ github.event.inputs.clean }} + + - name: Create Pull Request + id: cpr + if: "${{ steps.update.outputs.new }}" + uses: carpentries/create-pull-request@main + with: + token: ${{ secrets.SANDPAPER_WORKFLOW }} + delete-branch: true + branch: "update/workflows" + commit-message: "[actions] update sandpaper workflow to version ${{ steps.update.outputs.new }}" + title: "Update Workflows to Version ${{ steps.update.outputs.new }}" + body: | + :robot: This is an automated build + + Update Workflows from sandpaper version ${{ steps.update.outputs.old }} -> ${{ steps.update.outputs.new }} + + - Auto-generated by [create-pull-request][1] on ${{ steps.update.outputs.date }} + + [1]: https://github.com/carpentries/create-pull-request/tree/main + labels: "type: template and tools" + draft: false diff --git a/.github/workflows/workbench-beta-phase.yml b/.github/workflows/workbench-beta-phase.yml new file mode 100644 index 000000000..2faa25d9c --- /dev/null +++ b/.github/workflows/workbench-beta-phase.yml @@ -0,0 +1,60 @@ +name: "Deploy to AWS" + +on: + workflow_run: + workflows: ["01 Build and Deploy Site"] + types: + - completed + workflow_dispatch: + +jobs: + preflight: + name: "Preflight Check" + runs-on: ubuntu-latest + outputs: + ok: ${{ steps.check.outputs.ok }} + folder: ${{ steps.check.outputs.folder }} + steps: + - id: check + run: | + if [[ -z "${{ secrets.DISTRIBUTION }}" || -z "${{ secrets.AWS_ACCESS_KEY_ID }}" || -z "${{ secrets.AWS_SECRET_ACCESS_KEY }}" ]]; then + echo ":information_source: No site configured" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo 'To deploy the preview on AWS, you need the `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` and `DISTRIBUTION` secrets set up' >> $GITHUB_STEP_SUMMARY + else + echo "::set-output name=folder::"$(sed -E 's^.+/(.+)^\1^' <<< ${{ github.repository }}) + echo "::set-output name=ok::true" + fi + + full-build: + name: "Deploy to AWS" + needs: [preflight] + if: ${{ needs.preflight.outputs.ok }} + runs-on: ubuntu-latest + steps: + + - name: "Checkout site folder" + uses: actions/checkout@v3 + with: + ref: 'gh-pages' + path: 'source' + + - name: "Deploy to Bucket" + uses: jakejarvis/s3-sync-action@v0.5.1 + with: + args: --acl public-read --follow-symlinks --delete --exclude '.git/*' + env: + AWS_S3_BUCKET: preview.carpentries.org + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + SOURCE_DIR: 'source' + DEST_DIR: ${{ needs.preflight.outputs.folder }} + + - name: "Invalidate CloudFront" + uses: chetan/invalidate-cloudfront-action@master + env: + PATHS: /* + AWS_REGION: 'us-east-1' + DISTRIBUTION: ${{ secrets.DISTRIBUTION }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} diff --git a/.gitignore b/.gitignore index c9a012a6e..9b48956b0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,42 @@ +# sandpaper files +episodes/*html +site/* +!site/README.md + +# History files +.Rhistory +.Rapp.history +# Session Data files +.RData +# User-specific files +.Ruserdata +# Example code in package build process +*-Ex.R +# Output files from R CMD build +/*.tar.gz +# Output files from R CMD check +/*.Rcheck/ +# RStudio files +.Rproj.user/ +# produced vignettes +vignettes/*.html +vignettes/*.pdf +# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 +.httr-oauth +# knitr and R markdown default cache directories +*_cache/ +/cache/ +# Temporary files created by R markdown +*.utf8.md +*.knit.md +# R Environment Variables +.Renviron +# pkgdown site +docs/ +# translation temp files +po/*~ +# renv detritus +renv/sandbox/ *.pyc *~ .DS_Store @@ -6,10 +45,6 @@ __pycache__ _site .Rproj.user -.Rhistory -.RData - /Gemfile.lock - vendor .bundle diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index c3b966907..f19b80495 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,11 +1,13 @@ --- -layout: page title: "Contributor Code of Conduct" --- + As contributors and maintainers of this project, -we pledge to follow the [Carpentry Code of Conduct][coc]. +we pledge to follow the [The Carpentries Code of Conduct][coc]. Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by following our [reporting guidelines][coc-reporting]. -{% include links.md %} + +[coc-reporting]: https://docs.carpentries.org/topic_folders/policies/incident-reporting.html +[coc]: https://docs.carpentries.org/topic_folders/policies/code-of-conduct.html diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index be24ca482..6c2b81c84 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,151 +1,123 @@ -# Contributing - -[The Carpentries][c-site] ([Software Carpentry][swc-site], [Data Carpentry][dc-site], and [Library Carpentry][lc-site]) are open source projects, -and we welcome contributions of all kinds: -new lessons, -fixes to existing material, -bug reports, -and reviews of proposed changes are all welcome. - -## Contributor Agreement - -By contributing, -you agree that we may redistribute your work under [our license](LICENSE.md). -In exchange, -we will address your issues and/or assess your change proposal as promptly as we can, -and help you become a member of our community. -Everyone involved in [The Carpentries][c-site] -agrees to abide by our [code of conduct](CODE_OF_CONDUCT.md). - -## How to Contribute - -The easiest way to get started is to file an issue -to tell us about a spelling mistake, -some awkward wording, -or a factual error. -This is a good way to introduce yourself -and to meet some of our community members. - -1. If you do not have a [GitHub][github] account, - you can [send us comments by email][email]. - However, - we will be able to respond more quickly if you use one of the other methods described below. - -2. If you have a [GitHub][github] account, - or are willing to [create one][github-join], - but do not know how to use Git, - you can report problems or suggest improvements by [creating an issue][issues]. - This allows us to assign the item to someone - and to respond to it in a threaded discussion. - -3. If you are comfortable with Git, - and would like to add or change material, - you can submit a pull request (PR). - Instructions for doing this are [included below](#using-github). - -## Where to Contribute - -1. If you wish to change this lesson, - please work in , - which can be viewed at . - -2. If you wish to change the example lesson, - please work in , - which documents the format of our lessons - and can be viewed at . - -3. If you wish to change the template used for workshop websites, - please work in . - The home page of that repository explains how to set up workshop websites, - while the extra pages in - provide more background on our design choices. - -4. If you wish to change CSS style files, tools, - or HTML boilerplate for lessons or workshops stored in `_includes` or `_layouts`, - please work in . - -## What to Contribute - -There are many ways to contribute, -from writing new exercises and improving existing ones -to updating or filling in the documentation -and submitting [bug reports][issues] -about things that don't work, aren't clear, or are missing. -If you are looking for ideas, please see the 'Issues' tab for -a list of issues associated with this repository, -or you may also look at the issues for [Data Carpentry][dc-issues], -[Software Carpentry][swc-issues], and [Library Carpentry][lc-issues] projects. - -Comments on issues and reviews of pull requests are just as welcome: -we are smarter together than we are on our own. -Reviews from novices and newcomers are particularly valuable: -it's easy for people who have been using these lessons for a while -to forget how impenetrable some of this material can be, -so fresh eyes are always welcome. - -## What *Not* to Contribute - -Our lessons already contain more material than we can cover in a typical workshop, -so we are usually *not* looking for more concepts or tools to add to them. -As a rule, -if you want to introduce a new idea, -you must (a) estimate how long it will take to teach -and (b) explain what you would take out to make room for it. -The first encourages contributors to be honest about requirements; -the second, to think hard about priorities. - -We are also not looking for exercises or other material that only run on one platform. -Our workshops typically contain a mixture of Windows, macOS, and Linux users; -in order to be usable, -our lessons must run equally well on all three. - -## Using GitHub - -If you choose to contribute via GitHub, you may want to look at -[How to Contribute to an Open Source Project on GitHub][how-contribute]. -To manage changes, we follow [GitHub flow][github-flow]. -Each lesson has two maintainers who review issues and pull requests or encourage others to do so. -The maintainers are community volunteers and have final say over what gets merged into the lesson. -To use the web interface for contributing to a lesson: - -1. Fork the originating repository to your GitHub profile. -2. Within your version of the forked repository, move to the `gh-pages` branch and -create a new branch for each significant change being made. -3. Navigate to the file(s) you wish to change within the new branches and make revisions as required. -4. Commit all changed files within the appropriate branches. -5. Create individual pull requests from each of your changed branches -to the `gh-pages` branch within the originating repository. -6. If you receive feedback, make changes using your issue-specific branches of the forked -repository and the pull requests will update automatically. -7. Repeat as needed until all feedback has been addressed. - -When starting work, please make sure your clone of the originating `gh-pages` branch is up-to-date -before creating your own revision-specific branch(es) from there. -Additionally, please only work from your newly-created branch(es) and *not* -your clone of the originating `gh-pages` branch. -Lastly, published copies of all the lessons are available in the `gh-pages` branch of the originating -repository for reference while revising. - -## Other Resources - -General discussion of [Software Carpentry][swc-site] and [Data Carpentry][dc-site] -happens on the [discussion mailing list][discuss-list], -which everyone is welcome to join. -You can also [reach us by email][email]. - -[email]: mailto:admin@software-carpentry.org +## Contributing + +[The Carpentries][cp-site] ([Software Carpentry][swc-site], [Data +Carpentry][dc-site], and [Library Carpentry][lc-site]) are open source +projects, and we welcome contributions of all kinds: new lessons, fixes to +existing material, bug reports, and reviews of proposed changes are all +welcome. + +### Contributor Agreement + +By contributing, you agree that we may redistribute your work under [our +license](LICENSE.md). In exchange, we will address your issues and/or assess +your change proposal as promptly as we can, and help you become a member of our +community. Everyone involved in [The Carpentries][cp-site] agrees to abide by +our [code of conduct](CODE_OF_CONDUCT.md). + +### How to Contribute + +The easiest way to get started is to file an issue to tell us about a spelling +mistake, some awkward wording, or a factual error. This is a good way to +introduce yourself and to meet some of our community members. + +1. If you do not have a [GitHub][github] account, you can [send us comments by + email][contact]. However, we will be able to respond more quickly if you use + one of the other methods described below. + +2. If you have a [GitHub][github] account, or are willing to [create + one][github-join], but do not know how to use Git, you can report problems + or suggest improvements by [creating an issue][repo-issues]. This allows us + to assign the item to someone and to respond to it in a threaded discussion. + +3. If you are comfortable with Git, and would like to add or change material, + you can submit a pull request (PR). Instructions for doing this are + [included below](#using-github). For inspiration about changes that need to + be made, check out the [list of open issues][issues] across the Carpentries. + +Note: if you want to build the website locally, please refer to [The Workbench +documentation][template-doc]. + +### Where to Contribute + +1. If you wish to change this lesson, add issues and pull requests here. +2. If you wish to change the template used for workshop websites, please refer + to [The Workbench documentation][template-doc]. + + +### What to Contribute + +There are many ways to contribute, from writing new exercises and improving +existing ones to updating or filling in the documentation and submitting [bug +reports][issues] about things that do not work, are not clear, or are missing. +If you are looking for ideas, please see [the list of issues for this +repository][repo-issues], or the issues for [Data Carpentry][dc-issues], +[Library Carpentry][lc-issues], and [Software Carpentry][swc-issues] projects. + +Comments on issues and reviews of pull requests are just as welcome: we are +smarter together than we are on our own. **Reviews from novices and newcomers +are particularly valuable**: it's easy for people who have been using these +lessons for a while to forget how impenetrable some of this material can be, so +fresh eyes are always welcome. + +### What *Not* to Contribute + +Our lessons already contain more material than we can cover in a typical +workshop, so we are usually *not* looking for more concepts or tools to add to +them. As a rule, if you want to introduce a new idea, you must (a) estimate how +long it will take to teach and (b) explain what you would take out to make room +for it. The first encourages contributors to be honest about requirements; the +second, to think hard about priorities. + +We are also not looking for exercises or other material that only run on one +platform. Our workshops typically contain a mixture of Windows, macOS, and +Linux users; in order to be usable, our lessons must run equally well on all +three. + +### Using GitHub + +If you choose to contribute via GitHub, you may want to look at [How to +Contribute to an Open Source Project on GitHub][how-contribute]. In brief, we +use [GitHub flow][github-flow] to manage changes: + +1. Create a new branch in your desktop copy of this repository for each + significant change. +2. Commit the change in that branch. +3. Push that branch to your fork of this repository on GitHub. +4. Submit a pull request from that branch to the [upstream repository][repo]. +5. If you receive feedback, make changes on your desktop and push to your + branch on GitHub: the pull request will update automatically. + +NB: The published copy of the lesson is usually in the `main` branch. + +Each lesson has a team of maintainers who review issues and pull requests or +encourage others to do so. The maintainers are community volunteers, and have +final say over what gets merged into the lesson. + +### Other Resources + +The Carpentries is a global organisation with volunteers and learners all over +the world. We share values of inclusivity and a passion for sharing knowledge, +teaching and learning. There are several ways to connect with The Carpentries +community listed at including via social +media, slack, newsletters, and email lists. You can also [reach us by +email][contact]. + +[repo]: https://example.com/FIXME +[repo-issues]: https://example.com/FIXME/issues +[contact]: mailto:team@carpentries.org +[cp-site]: https://carpentries.org/ [dc-issues]: https://github.com/issues?q=user%3Adatacarpentry -[dc-lessons]: http://datacarpentry.org/lessons/ -[dc-site]: http://datacarpentry.org/ -[discuss-list]: http://lists.software-carpentry.org/listinfo/discuss +[dc-lessons]: https://datacarpentry.org/lessons/ +[dc-site]: https://datacarpentry.org/ +[discuss-list]: https://carpentries.topicbox.com/groups/discuss [github]: https://github.com [github-flow]: https://guides.github.com/introduction/flow/ [github-join]: https://github.com/join -[how-contribute]: https://egghead.io/series/how-to-contribute-to-an-open-source-project-on-github -[issues]: https://guides.github.com/features/issues/ +[how-contribute]: https://egghead.io/courses/how-to-contribute-to-an-open-source-project-on-github +[issues]: https://carpentries.org/help-wanted-issues/ +[lc-issues]: https://github.com/issues?q=user%3ALibraryCarpentry [swc-issues]: https://github.com/issues?q=user%3Aswcarpentry [swc-lessons]: https://software-carpentry.org/lessons/ [swc-site]: https://software-carpentry.org/ -[c-site]: https://carpentries.org/ [lc-site]: https://librarycarpentry.org/ -[lc-issues]: https://github.com/issues?q=user%3Alibrarycarpentry +[template-doc]: https://carpentries.github.io/workbench/ diff --git a/LICENSE.md b/LICENSE.md index 41c0494ed..7632871ff 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,82 +1,79 @@ --- -layout: page title: "Licenses" -root: . --- + ## Instructional Material -All Software Carpentry, Data Carpentry, and Library Carpentry instructional material is -made available under the [Creative Commons Attribution -license][cc-by-human]. The following is a human-readable summary of +All Carpentries (Software Carpentry, Data Carpentry, and Library Carpentry) +instructional material is made available under the [Creative Commons +Attribution license][cc-by-human]. The following is a human-readable summary of (and not a substitute for) the [full legal text of the CC BY 4.0 license][cc-by-legal]. You are free: -* to **Share**---copy and redistribute the material in any medium or format -* to **Adapt**---remix, transform, and build upon the material +- to **Share**---copy and redistribute the material in any medium or format +- to **Adapt**---remix, transform, and build upon the material for any purpose, even commercially. -The licensor cannot revoke these freedoms as long as you follow the -license terms. +The licensor cannot revoke these freedoms as long as you follow the license +terms. Under the following terms: -* **Attribution**---You must give appropriate credit (mentioning that - your work is derived from work that is Copyright © D. M. Eyers, S. L. R. - Stevens, A. Turner, C. Koch and J. Cohen and, where practical, linking to - https://github.com/carpentries-incubator/docker-introduction), provide a - [link to the license][cc-by-human], and indicate if changes were made. You - may do so in any reasonable manner, but not in any way that suggests the - licensor endorses you or your use. +- **Attribution**---You must give appropriate credit (mentioning that your work + is derived from work that is Copyright (c) The Carpentries and, where + practical, linking to ), provide a [link to the + license][cc-by-human], and indicate if changes were made. You may do so in + any reasonable manner, but not in any way that suggests the licensor endorses + you or your use. -**No additional restrictions**---You may not apply legal terms or -technological measures that legally restrict others from doing -anything the license permits. With the understanding that: +- **No additional restrictions**---You may not apply legal terms or + technological measures that legally restrict others from doing anything the + license permits. With the understanding that: Notices: -* You do not have to comply with the license for elements of the - material in the public domain or where your use is permitted by an - applicable exception or limitation. -* No warranties are given. The license may not give you all of the - permissions necessary for your intended use. For example, other - rights such as publicity, privacy, or moral rights may limit how you - use the material. +* You do not have to comply with the license for elements of the material in + the public domain or where your use is permitted by an applicable exception + or limitation. +* No warranties are given. The license may not give you all of the permissions + necessary for your intended use. For example, other rights such as publicity, + privacy, or moral rights may limit how you use the material. ## Software -Except where otherwise noted, the example programs and other software -provided in this lesson are made available under the [OSI][osi]-approved -[MIT license][mit-license]. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +Except where otherwise noted, the example programs and other software provided +by The Carpentries are made available under the [OSI][osi]-approved [MIT +license][mit-license]. + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. ## Trademark -"Software Carpentry" and "Data Carpentry" and their respective logos -are registered trademarks of [Community Initiatives][CI]. +"The Carpentries", "Software Carpentry", "Data Carpentry", and "Library +Carpentry" and their respective logos are registered trademarks of [Community +Initiatives][ci]. [cc-by-human]: https://creativecommons.org/licenses/by/4.0/ [cc-by-legal]: https://creativecommons.org/licenses/by/4.0/legalcode [mit-license]: https://opensource.org/licenses/mit-license.html -[ci]: http://communityin.org/ +[ci]: https://communityin.org/ [osi]: https://opensource.org diff --git a/README.md b/README.md index 107d08ac4..ba1a42c5c 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,20 @@ +> **ATTENTION** This is an experimental test of [The Carpentries Workbench](https://carpentries.github.io/workbench) lesson infrastructure. +> It was automatically converted from the source lesson via [the lesson transition script](https://github.com/carpentries/lesson-transition/). +> +> If anything seems off, please contact Zhian Kamvar [zkamvar@carpentries.org](mailto:zkamvar@carpentries.org) + [![Gitpod ready-to-code](https://img.shields.io/badge/Gitpod-ready--to--code-blue?logo=gitpod)](https://gitpod.io/#https://github.com/carpentries-incubator/docker-introduction) -[![The Carpentries Lab Review Status](http://badges.carpentries-lab.org/15_status.svg)](https://github.com/carpentries-lab/reviews/issues/15) +[![The Carpentries Lab Review Status](https://badges.carpentries-lab.org/15_status.svg)](https://github.com/carpentries-lab/reviews/issues/15) # Reproducible computational environments using containers: Introduction to Docker [![Create a Slack Account with us](https://img.shields.io/badge/Create_Slack_Account-The_Carpentries-071159.svg)](https://swc-slack-invite.herokuapp.com/) -This repository generates the corresponding lesson website from [The Carpentries](https://carpentries.org/) repertoire of lessons. +This repository generates the corresponding lesson website from [The Carpentries](https://carpentries.org/) repertoire of lessons. + +If you are interested in Singularity as opposed to Docker, see the Singularity lesson in the Carpentries Incubator: -If you are interested in Singularity as opposed to Docker, see the Singularity lesson in the Carpentries Incubator: -* [Reproducible Computational Environments Using Containers: Introduction to Singularity](https://github.com/carpentries-incubator/singularity-introduction) +- [Reproducible Computational Environments Using Containers: Introduction to Singularity](https://github.com/carpentries-incubator/singularity-introduction) ## Contributing @@ -21,21 +27,19 @@ how to write new episodes. Please see the current list of [issues](https://github.com/carpentries-incubator/docker-introduction/issues) for ideas for contributing to this repository. For making your contribution, we use the GitHub flow, which is -nicely explained in the chapter [Contributing to a Project](http://git-scm.com/book/en/v2/GitHub-Contributing-to-a-Project) in Pro Git +nicely explained in the chapter [Contributing to a Project](https://git-scm.com/book/en/v2/GitHub-Contributing-to-a-Project) in Pro Git by Scott Chacon. -Look for the tag ![good_first_issue](https://img.shields.io/badge/-good%20first%20issue-gold.svg). This indicates that the mantainers will welcome a pull request fixing this issue. - +Look for the tag ![good\_first\_issue](https://img.shields.io/badge/-good%20first%20issue-gold.svg). This indicates that the mantainers will welcome a pull request fixing this issue. ## Maintainer(s) -Current maintainers of this lesson are - -* [Jeremy Cohen](https://github.com/jcohen02) -* [David Eyers](https://github.com/dme26/) -* [Christina Koch](https://github.com/ChristinaLK) -* [Andy Turner](https://github.com/aturner-epcc) -* [Sarah Stevens](https://github.com/sstevens2/) - Send emails with questions here +Current maintainers of this lesson are +- [Jeremy Cohen](https://github.com/jcohen02) +- [David Eyers](https://github.com/dme26/) +- [Christina Koch](https://github.com/ChristinaLK) +- [Andy Turner](https://github.com/aturner-epcc) +- [Sarah Stevens](https://github.com/sstevens2/) - Send emails with questions here ## Authors @@ -46,3 +50,6 @@ A list of contributors to the lesson can be found in [AUTHORS](AUTHORS) To cite this lesson, please consult with [CITATION](CITATION) [lesson-example]: https://carpentries.github.io/lesson-example + + + diff --git a/_extras/figures.md b/_extras/figures.md deleted file mode 100644 index ee5b65082..000000000 --- a/_extras/figures.md +++ /dev/null @@ -1,68 +0,0 @@ ---- -title: Figures ---- - -{% include base_path.html %} - - -{% comment %} -Create anchor for each one of the episodes. -{% endcomment %} -{% for episode in site.episodes %} -
-{% endfor %} - -{% include links.md %} diff --git a/config.yaml b/config.yaml new file mode 100644 index 000000000..022ae0c0e --- /dev/null +++ b/config.yaml @@ -0,0 +1,90 @@ +#------------------------------------------------------------ +# Values for this lesson. +#------------------------------------------------------------ + +# Which carpentry is this (swc, dc, lc, or cp)? +# swc: Software Carpentry +# dc: Data Carpentry +# lc: Library Carpentry +# cp: Carpentries (to use for instructor training for instance) +# incubator: The Carpentries Incubator +carpentry: 'incubator' + +# Overall title for pages. +title: 'Reproducible Computational Environments Using Containers: Introduction to Docker' + +# Date the lesson was created (YYYY-MM-DD, this is empty by default) +created: + +# Comma-separated list of keywords for the lesson +keywords: 'software, data, lesson, The Carpentries' + +# Life cycle stage of the lesson +# possible values: pre-alpha, alpha, beta, stable +life_cycle: 'beta' + +# License of the lesson materials (recommended CC-BY 4.0) +license: 'CC-BY 4.0' + +# Link to the source repository for this lesson +source: 'https://github.com/fishtree-attempt/docker-introduction/' + +# Default branch of your lesson +branch: 'main' + +# Who to contact if there are any issues +contact: 'docker-introduction+admins@lists.carpentries.org' + +# Navigation ------------------------------------------------ +# +# Use the following menu items to specify the order of +# individual pages in each dropdown section. Leave blank to +# include all pages in the folder. +# +# Example ------------- +# +# episodes: +# - introduction.md +# - first-steps.md +# +# learners: +# - setup.md +# +# instructors: +# - instructor-notes.md +# +# profiles: +# - one-learner.md +# - another-learner.md + +# Order of episodes in your lesson +episodes: +- introduction.md +- meet-docker.md +- running-containers.md +- managing-containers.md +- docker-hub.md +- creating-container-images.md +- advanced-containers.md +- docker-image-examples.md +- reproduciblity.md + +# Information for Learners +learners: + +# Information for Instructors +instructors: + +# Learner Profiles +profiles: + +# Customisation --------------------------------------------- +# +# This space below is where custom yaml items (e.g. pinning +# sandpaper and varnish versions) should live + + +url: https://preview.carpentries.org/docker-introduction +analytics: carpentries +lang: en +workbench-beta: yes diff --git a/episodes/advanced-containers.md b/episodes/advanced-containers.md new file mode 100644 index 000000000..e4ed9d663 --- /dev/null +++ b/episodes/advanced-containers.md @@ -0,0 +1,505 @@ +--- +title: Creating More Complex Container Images +teaching: 30 +exercises: 30 +--- + +::::::::::::::::::::::::::::::::::::::: objectives + +- Explain how you can include files within Docker container images when you build them. +- Explain how you can access files on the Docker host from your Docker containers. + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +:::::::::::::::::::::::::::::::::::::::: questions + +- How can I make more complex container images? + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +In order to create and use your own container images, you may need more information than +our previous example. You may want to use files from outside the container, +that are not included within the container image, either by copying the files +into the container image, or by making them visible within a running container from their +existing location on your host system. You may also want to learn a little bit +about how to install software within a running container or a container image. +This episode will look at these advanced aspects of running a container or building +a container image. Note that the examples will get gradually +more and more complex -- most day-to-day use of containers and container images can be accomplished +using the first 1--2 sections on this page. + +## Using scripts and files from outside the container + +In your shell, change to the `sum` folder in the `docker-intro` folder and look at +the files inside. + +```bash +$ cd ~/Desktop/docker-intro/sum +$ ls +``` + +This folder has both a `Dockerfile` and a Python script called `sum.py`. Let's say +we wanted to try running the script using a container based on our recently created `alpine-python` +container image. + +::::::::::::::::::::::::::::::::::::::: challenge + +## Running containers + +What command would we use to run Python from the `alpine-python` container? + + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +If we try running the container and Python script, what happens? + +```bash +$ docker container run alice/alpine-python python3 sum.py +``` + +```output +python3: can't open file '//sum.py': [Errno 2] No such file or directory +``` + +::::::::::::::::::::::::::::::::::::::: challenge + +## No such file or directory + +What does the error message mean? Why might the Python inside the container +not be able to find or open our script? + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +The problem here is that the container and its filesystem is separate from our +host computer's filesystem. When the container runs, it can't see anything outside +itself, including any of the files on our computer. In order to use Python +(inside the container) and our script (outside the container, on our host computer), +we need to create a link between the directory on our computer and the container. + +This link is called a "mount" and is what happens automatically when a USB drive +or other external hard drive gets connected to a computer -- you can see the +contents appear as if they were on your computer. + +We can create a mount between our computer and the running container by using an additional +option to `docker container run`. We'll also use the variable `${PWD}` which will substitute +in our current working directory. The option will look like this + +`--mount type=bind,source=${PWD},target=/temp` + +What this means is: make my current working directory (on the host computer) -- the source -- +*visible* within the container that is about to be started, and inside this container, name the +directory `/temp` -- the target. + +::::::::::::::::::::::::::::::::::::::::: callout + +## Types of mounts + +You will notice that we set the mount `type=bind`, there are other types of mount that +can be used in Docker (e.g. `volume` and `tmpfs`). We do not cover other types of mounts +or the differences between these mount types in the course as it is more of an advanced +topic. You can find more information on the different mount types in +[the Docker documentation](https://docs.docker.com/storage/). + + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +Let's try running the command now: + +```bash +$ docker container run --mount type=bind,source=${PWD},target=/temp alice/alpine-python python3 sum.py +``` + +But we get the same error! + +```output +python3: can't open file '//sum.py': [Errno 2] No such file or directory +``` + +This final piece is a bit tricky -- we really have to remember to put ourselves +inside the container. Where is the `sum.py` file? It's in the directory that's been +mapped to `/temp` -- so we need to include that in the path to the script. This +command should give us what we need: + +```bash +$ docker container run --mount type=bind,source=${PWD},target=/temp alice/alpine-python python3 /temp/sum.py +``` + +Note that if we create any files in the `/temp` directory while the container is +running, these files will appear on our host filesystem in the original directory +and will stay there even when the container stops. + +::::::::::::::::::::::::::::::::::::::::: callout + +## Other Commonly Used Docker Run Flags + +Docker run has many other useful flags to alter its function. +A couple that are commonly used include `-w` and `-u`. + +The `--workdir`/`-w` flag sets the working directory a.k.a. runs the command +being executed inside the directory specified. +For example, the following code would run the `pwd` command in a container +started from the latest ubuntu image in the `/home/alice` directory and print +`/home/alice`. If the directory doesn't exist in the image it will create it. + +``` +docker container run -w /home/alice/ ubuntu pwd +``` + +The `--user`/`-u` flag lets you specify the username you would like to run the +container as. This is helpful if you'd like to write files to a mounted folder +and not write them as `root` but rather your own user identity and group. +A common example of the `-u` flag is `--user $(id -u):$(id -g)` which will +fetch the current user's ID and group and run the container as that user. + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +::::::::::::::::::::::::::::::::::::::: challenge + +## Exercise: Explore the script + +What happens if you use the `docker container run` command above +and put numbers after the script name? + +::::::::::::::: solution + +## Solution + +This script comes from [the Python Wiki](https://wiki.python.org/moin/SimplePrograms) +and is set to add all numbers +that are passed to it as arguments. + + + +::::::::::::::::::::::::: + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +::::::::::::::::::::::::::::::::::::::: challenge + +## Exercise: Checking the options + +Our Docker command has gotten much longer! Can you go through each piece of +the Docker command above and explain what it does? How would you characterize +the key components of a Docker command? + +::::::::::::::: solution + +## Solution + +Here's a breakdown of each piece of the command above + +- `docker container run`: use Docker to run a container +- `--mount type=bind,source=${PWD},target=/temp`: connect my current working directory (`${PWD}`) as a folder + inside the container called `/temp` +- `alice/alpine-python`: name of the container image to use to run the container +- `python3 /temp/sum.py`: what commands to run in the container + +More generally, every Docker command will have the form: +`docker [action] [docker options] [docker container image] [command to run inside]` + +::::::::::::::::::::::::: + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +::::::::::::::::::::::::::::::::::::::: challenge + +## Exercise: Interactive jobs + +Try using the directory mount option but run the container interactively. +Can you find the folder that's connected to your host computer? What's inside? + +::::::::::::::: solution + +## Solution + +The docker command to run the container interactively is: + +```bash +$ docker container run --mount type=bind,source=${PWD},target=/temp -it alice/alpine-python sh +``` + +Once inside, you should be able to navigate to the `/temp` folder +and see that's contents are the same as the files on your host computer: + +```bash +/# cd /temp +/# ls +``` + +::::::::::::::::::::::::: + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +Mounting a directory can be very useful when you want to run the software inside your container on many different input files. +In other situations, you may want to save or archive an authoritative version of your data by adding it to the container image permanently. That's what we will cover next. + +## Including your scripts and data within a container image + +Our next project will be to add our own files to a container image -- something you +might want to do if you're sharing a finished analysis or just want to have +an archived copy of your entire analysis including the data. Let's assume that we've finished with our `sum.py` +script and want to add it to the container image itself. + +In your shell, you should still be in the `sum` folder in the `docker-intro` folder. + +```bash +$ pwd +``` + +```bash +$ /Users/yourname/Desktop/docker-intro/sum +``` + +Let's add a new line to the `Dockerfile` we've been using so far to create a copy of `sum.py`. +We can do so by using the `COPY` keyword. + +``` +COPY sum.py /home +``` + +This line will cause Docker to copy the file from your computer into the container's +filesystem. Let's build the container image like before, but give it a different name: + +```bash +$ docker image build -t alice/alpine-sum . +``` + +::::::::::::::::::::::::::::::::::::::::: callout + +## The Importance of Command Order in a Dockerfile + +When you run `docker image build` it executes the build in the order specified +in the `Dockerfile`. +This order is important for rebuilding and you typically will want to put your `RUN` +commands before your `COPY` commands. + +Docker builds the layers of commands in order. +This becomes important when you need to rebuild container images. +If you change layers later in the `Dockerfile` and rebuild the container image, Docker doesn't need to +rebuild the earlier layers but will instead used a stored (called "cached") version of +those layers. + +For example, in an instance where you wanted to copy `multiply.py` into the container +image instead of `sum.py`. +If the `COPY` line came before the `RUN` line, it would need to rebuild the whole image. +If the `COPY` line came second then it would use the cached `RUN` layer from the previous +build and then only rebuild the `COPY` layer. + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +::::::::::::::::::::::::::::::::::::::: challenge + +## Exercise: Did it work? + +Can you remember how to run a container interactively? Try that with this one. +Once inside, try running the Python script. + +::::::::::::::: solution + +## Solution + +You can start the container interactively like so: + +```bash +$ docker container run -it alice/alpine-sum sh +``` + +You should be able to run the python command inside the container like this: + +```bash +/# python3 /home/sum.py +``` + +::::::::::::::::::::::::: + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +This `COPY` keyword can be used to place your own scripts or own data into a container image +that you want to publish or use as a record. Note that it's not necessarily a good idea +to put your scripts inside the container image if you're constantly changing or editing them. +Then, referencing the scripts from outside the container is a good idea, as we +did in the previous section. You also want to think carefully about size -- if you +run `docker image ls` you'll see the size of each container image all the way on the right of +the screen. The bigger your container image becomes, the harder it will be to easily download. + +::::::::::::::::::::::::::::::::::::::::: callout + +## Security Warning + +Login credentials including passwords, tokens, secure access tokens or other secrets +must never be stored in a container. If secrets are stored, they are at high risk to +be found and exploited when made public. + + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +::::::::::::::::::::::::::::::::::::::::: callout + +## Copying alternatives + +Another trick for getting your own files into a container image is by using the `RUN` +keyword and downloading the files from the internet. For example, if your code +is in a GitHub repository, you could include this statement in your Dockerfile +to download the latest version every time you build the container image: + +``` +RUN git clone https://github.com/alice/mycode +``` + +Similarly, the `wget` command can be used to download any file publicly available +on the internet: + +``` +RUN wget ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.10.0/ncbi-blast-2.10.0+-x64-linux.tar.gz +``` + +Note that the above `RUN` examples depend on commands (`git` and `wget` respectively) that +must be available within your container: Linux distributions such as Alpine may require you to +install such commands before using them within `RUN` statements. + + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +## More fancy `Dockerfile` options (optional, for presentation or as exercises) + +We can expand on the example above to make our container image even more "automatic". +Here are some ideas: + +### Make the `sum.py` script run automatically + +``` +FROM alpine +RUN apk add --update python3 py3-pip python3-dev +COPY sum.py /home + +# Run the sum.py script as the default command +CMD ["python3", "/home/sum.py"] +``` + +Build and test it: + +```bash +$ docker image build -t alpine-sum:v1 . +$ docker container run alpine-sum:v1 +``` + +You'll notice that you can run the container without arguments just fine, +resulting in `sum = 0`, but this is boring. Supplying arguments however +doesn't work: + +```bash +docker container run alpine-sum:v1 10 11 12 +``` + +results in + +```output +docker: Error response from daemon: OCI runtime create failed: +container_linux.go:349: starting container process caused "exec: +\"10\": executable file not found in $PATH": unknown. +``` + +This is because the arguments `10 11 12` are interpreted as a +*command* that replaces the default command given by `CMD ["python3", "/home/sum.py"]` in the image. + +To achieve the goal of having a command that *always* runs when a +container is run from the container image *and* can be passed the arguments given on the +command line, use the keyword `ENTRYPOINT` in the `Dockerfile`. + +``` +FROM alpine + +RUN apk add --update python3 py3-pip python3-dev +COPY sum.py /home + +# Run the sum.py script as the default command and +# allow people to enter arguments for it +ENTRYPOINT ["python3", "/home/sum.py"] + +# Give default arguments, in case none are supplied on +# the command-line +CMD ["10", "11"] +``` + +Build and test it: + +```bash +$ docker image build -t alpine-sum:v2 . +# Most of the time you are interested in the sum of 10 and 11: +$ docker container run alpine-sum:v2 +# Sometimes you have more challenging calculations to do: +$ docker container run alpine-sum:v2 12 13 14 +``` + +::::::::::::::::::::::::::::::::::::::::: callout + +## Overriding the ENTRYPOINT + +Sometimes you don't want to run the +image's `ENTRYPOINT`. For example if you have a specialized container image +that does only sums, but you need an interactive shell to examine +the container: + +```bash +$ docker container run -it alpine-sum:v2 /bin/sh +``` + +will yield + +```output +Please supply integer arguments +``` + +You need to override the `ENTRYPOINT` statement in the container image like so: + +```bash +$ docker container run -it --entrypoint /bin/sh alpine-sum:v2 +``` + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +### Add the `sum.py` script to the `PATH` so you can run it directly: + +``` +FROM alpine + +RUN apk add --update python3 py3-pip python3-dev + +COPY sum.py /home +# set script permissions +RUN chmod +x /home/sum.py +# add /home folder to the PATH +ENV PATH /home:$PATH +``` + +Build and test it: + +```bash +$ docker image build -t alpine-sum:v3 . +$ docker container run alpine-sum:v3 sum.py 1 2 3 4 +``` + +::::::::::::::::::::::::::::::::::::::::: callout + +## Best practices for writing Dockerfiles + +Take a look at Nüst et al.'s "[*Ten simple rules for writing Dockerfiles for reproducible data science*](https://doi.org/10.1371/journal.pcbi.1008316)" [1] +for some great examples of best practices to use when writing Dockerfiles. +The [GitHub repository](https://github.com/nuest/ten-simple-rules-dockerfiles) associated with the paper also has a set of [example `Dockerfile`s](https://github.com/nuest/ten-simple-rules-dockerfiles/tree/master/examples) +demonstrating how the rules highlighted by the paper can be applied. + +[1] Nüst D, Sochat V, Marwick B, Eglen SJ, Head T, et al. (2020) Ten simple rules for writing Dockerfiles for reproducible data science. PLOS Computational Biology 16(11): e1008316. [https://doi.org/10.1371/journal.pcbi.1008316](https://doi.org/10.1371/journal.pcbi.1008316) + + +:::::::::::::::::::::::::::::::::::::::::::::::::: + + + +:::::::::::::::::::::::::::::::::::::::: keypoints + +- Docker allows containers to read and write files from the Docker host. +- You can include files from your Docker host into your Docker container images by using the `COPY` instruction in your `Dockerfile`. + +:::::::::::::::::::::::::::::::::::::::::::::::::: + + diff --git a/episodes/creating-container-images.md b/episodes/creating-container-images.md new file mode 100644 index 000000000..a59e3f6f6 --- /dev/null +++ b/episodes/creating-container-images.md @@ -0,0 +1,362 @@ +--- +title: Creating Your Own Container Images +teaching: 20 +exercises: 15 +--- + +::::::::::::::::::::::::::::::::::::::: objectives + +- Explain the purpose of a `Dockerfile` and show some simple examples. +- Demonstrate how to build a Docker container image from a `Dockerfile`. +- Compare the steps of creating a container image interactively versus a `Dockerfile`. +- Create an installation strategy for a container image. +- Demonstrate how to upload ('push') your container images to the Docker Hub. +- Describe the significance of the Docker Hub naming scheme. + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +:::::::::::::::::::::::::::::::::::::::: questions + +- How can I make my own Docker container images? +- How do I document the 'recipe' for a Docker container image? + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +There are lots of reasons why you might want to create your **own** Docker container image. + +- You can't find a container image with all the tools you need on Docker Hub. +- You want to have a container image to "archive" all the specific software versions you ran for a project. +- You want to share your workflow with someone else. + +## Interactive installation + +Before creating a reproducible installation, let's experiment with installing +software inside a container. Start a container from the `alpine` container image we used before, interactively: + +```bash +$ docker container run -it alpine sh +``` + +Because this is a basic container, there's a lot of things not installed -- for +example, `python3`. + +```bash +/# python3 +``` + +```output +sh: python3: not found +``` + +Inside the container, we can run commands to install Python 3. The Alpine version of +Linux has a installation tool called `apk` that we can use to install Python 3. + +```bash +/# apk add --update python3 py3-pip python3-dev +``` + +We can test our installation by running a Python command: + +```bash +/# python3 --version +``` + +::::::::::::::::::::::::::::::::::::::: challenge + +## Exercise: Searching for Help + +Can you find instructions for installing R on Alpine Linux? Do they work? + +::::::::::::::: solution + +## Solution + +A quick search should hopefully show that the way to install R on Alpine Linux is: + +```bash +/# apk add R +``` + +::::::::::::::::::::::::: + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +Once we exit, these changes are not saved to a new container image by default. There is +a command that will "snapshot" our changes, but building container images this way is +not easily reproducible. Instead, we're going to take what we've learned from this +interactive installation and create our container image from a reproducible recipe, +known as a `Dockerfile`. + +If you haven't already, exit out of the interactively running container. + +```bash +/# exit +``` + +## Put installation instructions in a `Dockerfile` + +A `Dockerfile` is a plain text file with keywords and commands that +can be used to create a new container image. + +From your shell, go to the folder you downloaded at the start of the lesson +and print out the Dockerfile inside: + +```bash +$ cd ~/Desktop/docker-intro/basic +$ cat Dockerfile +``` + +```output +FROM +RUN +CMD +``` + +Let's break this file down: + +- The first line, `FROM`, indicates which container image we're starting with. It is the "base" container image we are going to start from. +- The next two lines `RUN`, will indicate installation commands we want to run. These + are the same commands that we used interactively above. +- The last line, `CMD`, indicates the default command we want a + container based on this container image to run, if no other command is provided. It is recommended + to provide `CMD` in *exec-form* (see the + [`CMD` section](https://docs.docker.com/engine/reference/builder/#cmd) + of the Dockerfile documentation for more details). It is written as a + list which contains the executable to run as its first element, + optionally followed by any arguments as subsequent elements. The list + is enclosed in square brackets (`[]`) and its elements are + double-quoted (`"`) strings which are separated by commas. For + example, `CMD ["ls", "-lF", "--color", "/etc"]` would translate + to `ls -lF --color /etc`. + +::::::::::::::::::::::::::::::::::::::::: callout + +## *shell-form* and *exec-form* for CMD + +Another way to specify the parameter for the +[`CMD` instruction](https://docs.docker.com/engine/reference/builder/#cmd) +is the *shell-form*. Here you type the command as you would call it +from the command line. Docker then silently runs this command in the +image's standard shell. `CMD cat /etc/passwd` is equivalent to `CMD ["/bin/sh", "-c", "cat /etc/passwd"]`. We recommend to prefer the +more explicit *exec-form* because we will be able to create more +flexible container image command options and make sure complex commands +are unambiguous in this format. + + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +::::::::::::::::::::::::::::::::::::::: challenge + +## Exercise: Take a Guess + +Do you have any ideas about what we should use to fill in the sample Dockerfile +to replicate the installation we did above? + +::::::::::::::: solution + +## Solution: + +Based on our experience above, edit the `Dockerfile` (in your text editor of choice) +to look like this: + +``` +FROM alpine +RUN apk add --update python3 py3-pip python3-dev +CMD ["python3", "--version"] +``` + +::::::::::::::::::::::::: + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +The recipe provided by the `Dockerfile` shown in the solution to the preceding exercise will use Alpine Linux as the base container image, +add Python 3 and the Cython library, and set a default command to request Python 3 to report its version information. + +## Create a new Docker image + +So far, we only have a text file named `Dockerfile` -- we do not yet have a container image. +We want Docker to take this `Dockerfile`, +run the installation commands contained within it, and then save the +resulting container as a new container image. To do this we will use the +`docker image build` command. + +We have to provide `docker image build` with two pieces of information: + +- the location of the `Dockerfile` +- the name of the new container image. Remember the naming scheme from before? You should name + your new image with your Docker Hub username and a name for the container image, like this: `USERNAME/CONTAINER_IMAGE_NAME`. + +All together, the build command that you should run on your computer, will have a similar structure to this: + +```bash +$ docker image build -t USERNAME/CONTAINER_IMAGE_NAME . +``` + +The `-t` option names the container image; the final dot indicates that the `Dockerfile` is in +our current directory. + +For example, if my user name was `alice` and I wanted to call my +container image `alpine-python`, I would use this command: + +```bash +$ docker image build -t alice/alpine-python . +``` + +::::::::::::::::::::::::::::::::::::::::: callout + +## Build Context + +Notice that the final input to `docker image build` isn't the Dockerfile -- it's +a directory! In the command above, we've used the current working directory (`.`) of +the shell as the final input to the `docker image build` command. This option provides +what is called the *build context* to Docker -- if there are files being copied +into the built container image [more details in the next episode](advanced-containers.md) +they're assumed to be in this location. Docker expects to see a Dockerfile in the +build context also (unless you tell it to look elsewhere). + +Even if it won't need all of the files in the build context directory, Docker does +"load" them before starting to build, which means that it's a good idea to have +only what you need for the container image in a build context directory, as we've done +in this example. + + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +::::::::::::::::::::::::::::::::::::::: challenge + +## Exercise: Review! + +1. Think back to earlier. What command can you run to check if your container image was created + successfully? (Hint: what command shows the container images on your computer?) + +2. We didn't specify a tag for our container image name. What tag did Docker automatically use? + +3. What command will run a container based on the container image you've created? What should happen by default + if you run such a container? Can you make it do something different, like print + "hello world"? + +::::::::::::::: solution + +## Solution + +1. To see your new image, run `docker image ls`. You should see the name of your new + container image under the "REPOSITORY" heading. + +2. In the output of `docker image ls`, you can see that Docker has automatically + used the `latest` tag for our new container image. + +3. We want to use `docker container run` to run a container based on a container image. + +The following command should run a container and print out our default message, the version +of Python: + +```bash +$ docker container run alice/alpine-python +``` + +To run a container based on our container image and print out "Hello world" instead: + +```bash +$ docker container run alice/alpine-python echo "Hello World" +``` + +::::::::::::::::::::::::: + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +While it may not look like you have achieved much, you have already effected the combination of a lightweight Linux operating system with your specification to run a given command that can operate reliably on macOS, Microsoft Windows, Linux and on the cloud! + +## Boring but important notes about installation + +There are a lot of choices when it comes to installing software -- sometimes too many! +Here are some things to consider when creating your own container image: + +- **Start smart**, or, don't install everything from scratch! If you're using Python + as your main tool, start with a [Python container image](https://hub.docker.com/_/python). Same with [R](https://hub.docker.com/r/rocker/r-ver/). We've used Alpine Linux as an example + in this lesson, but it's generally not a good container image to start with for initial development and experimentation because it is + a less common distribution of Linux; using [Ubuntu](https://hub.docker.com/_/ubuntu), [Debian](https://hub.docker.com/_/debian) and [CentOS](https://hub.docker.com/_/centos) are all + good options for scientific software installations. The program you're using might + recommend a particular distribution of Linux, and if so, it may be useful to start with a container image for that distribution. +- **How big?** How much software do you really need to install? When you have a choice, + lean towards using smaller starting container images and installing only what's needed for + your software, as a bigger container image means longer download times to use. +- **Know (or Google) your Linux**. Different distributions of Linux often have distinct sets of tools for installing software. The `apk` command we used above is the software package installer for Alpine Linux. The installers for various common Linux distributions are listed below: + - Ubuntu: `apt` or `apt-get` + - Debian: `deb` + - CentOS: `yum` + Most common software installations are available to be installed via these tools. + A web search for "install X on Y Linux" is usually a good start for common software + installation tasks; if something isn't available via the Linux distribution's installation + tools, try the options below. +- **Use what you know**. You've probably used commands like `pip` or `install.packages()` + before on your own computer -- these will also work to install things in container images (if the basic scripting + language is installed). +- **README**. Many scientific software tools have a README or installation instructions + that lay out how to install software. You want to look for instructions for Linux. If + the install instructions include options like those suggested above, try those first. + +In general, a good strategy for installing software is: + +- Make a list of what you want to install. +- Look for pre-existing container images. +- Read through instructions for software you'll need to install. +- Try installing everything interactively in your base container -- take notes! +- From your interactive installation, create a `Dockerfile` and then try to build + the container image from that. + +## Share your new container image on Docker Hub + +Container images that you release publicly can be stored on the Docker Hub for free. If you +name your container image as described above, with your Docker Hub username, all you need to do +is run the opposite of `docker image pull` -- `docker image push`. + +```bash +$ docker image push alice/alpine-python +``` + +Make sure to substitute the full name of your container image! + +In a web browser, open [https://hub.docker.com](https://hub.docker.com), and on your user page you should now see your container image listed, for anyone to use or build on. + +::::::::::::::::::::::::::::::::::::::::: callout + +## Logging In + +Technically, you have to be logged into Docker on your computer for this to work. +Usually it happens by default, but if `docker image push` doesn't work for you, +run `docker login` first, enter your Docker Hub username and password, and then +try `docker image push` again. + + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +## What's in a name? (again) + +You don't *have* to name your containers images using the `USERNAME/CONTAINER_IMAGE_NAME:TAG` naming scheme. On your own computer, you can call container images whatever you want, and refer to +them by the names you choose. It's only when you want to share a container image that it +needs the correct naming format. + +You can rename container images using the `docker image tag` command. For example, imagine someone +named Alice has been working on a workflow container image and called it `workflow-test` +on her own computer. She now wants to share it in her `alice` Docker Hub account +with the name `workflow-complete` and a tag of `v1`. Her `docker image tag` command +would look like this: + +```bash +$ docker image tag workflow-test alice/workflow-complete:v1 +``` + +She could then push the re-named container image to Docker Hub, +using `docker image push alice/workflow-complete:v1` + + + +:::::::::::::::::::::::::::::::::::::::: keypoints + +- `Dockerfile`s specify what is within Docker container images. +- The `docker image build` command is used to build a container image from a `Dockerfile`. +- You can share your Docker container images through the Docker Hub so that others can create Docker containers from your container images. + +:::::::::::::::::::::::::::::::::::::::::::::::::: + + diff --git a/_extras/.gitkeep b/episodes/data/.gitkeep similarity index 100% rename from _extras/.gitkeep rename to episodes/data/.gitkeep diff --git a/episodes/docker-hub.md b/episodes/docker-hub.md new file mode 100644 index 000000000..0bc26c162 --- /dev/null +++ b/episodes/docker-hub.md @@ -0,0 +1,195 @@ +--- +title: Finding Containers on Docker Hub +teaching: 10 +exercises: 10 +--- + +::::::::::::::::::::::::::::::::::::::: objectives + +- Explain how the Docker Hub augments Docker use. +- Explore the Docker Hub webpage for a popular Docker container image. +- Find the list of tags for a particular Docker container image. +- Identify the three components of a container image's identifier. + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +:::::::::::::::::::::::::::::::::::::::: questions + +- What is the Docker Hub, and why is it useful? + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +In the previous episode, we ran a few different containers derived from different +container images: `hello-world`, `alpine`, +and maybe `ubuntu`. Where did these container images come from? The Docker Hub! + +## Introducing the Docker Hub + +The Docker Hub is an online repository of container images, a vast number of which are publicly available. A large number of the container images are curated by the developers of the software that they package. Also, many commonly used pieces of software that have been containerized into images are officially endorsed, which means that you can trust the container images to have been checked for functionality, stability, and that they don't contain malware. + +::::::::::::::::::::::::::::::::::::::::: callout + +## Docker can be used without connecting to the Docker Hub + +Note that while the Docker Hub is well integrated into Docker functionality, the Docker Hub is certainly not required for all types of use of Docker containers. For example, some organizations may run container infrastructure that is entirely disconnected from the Internet. + + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +## Exploring an Example Docker Hub Page + +As an example of a Docker Hub page, let's explore the page for the official Python language container images. The most basic form of containerized Python is in the `python` container image (which is endorsed by the Docker team). Open your web browser to [https://hub.docker.com/\_/python](https://hub.docker.com/_/python) to see what is on a typical Docker Hub software page. + +The top-left provides information about the name, short description, popularity (i.e., more than a billion downloads in the case of this container image), and endorsements. + +The top-right provides the command to pull this container image to your computer. + +The main body of the page contains many used headings, such as: + +- Which tags (i.e., container image versions) are supported; +- Summary information about where to get help, which computer architectures are supported, etc.; +- A longer description of the container image; +- Examples of how to use the container image; and +- The license that applies. + +The "How to use the image" section of most container images' pages will provide examples that are likely to cover your intended use of the container image. + +## Exploring Container Image Versions + +A single Docker Hub page can have many different versions of container images, +based on the version of the software inside. These +versions are indicated by "tags". When referring to the specific version of a container image +by its tag, you use a colon, `:`, like this: + +``` +CONTAINER_IMAGE_NAME:TAG +``` + +So if I wanted to download the `python` container image, with Python 3.8, I would use this name: + +```bash +$ docker image pull python:3.8 +``` + +But if I wanted to download a Python 3.6 container image, I would use this name: + +```bash +$ docker image pull python:3.6 +``` + +The default tag (which is used if you don't specify one) is called `latest`. + +So far, we've only seen container images that are maintained by the Docker team. However, +it's equally common to use container images that have been produced by individual owners +or organizations. Container images that you create and upload to Docker Hub would fall +into this category, as would the container images maintained by organizations like +[ContinuumIO](https://hub.docker.com/u/continuumio) (the folks who develop the Anaconda Python environment) or community +groups like [rocker](https://hub.docker.com/u/rocker), a group that builds community R container images. + +The name for these group- or individually-managed container images have this format: + +``` +OWNER/CONTAINER_IMAGE_NAME:TAG +``` + +::::::::::::::::::::::::::::::::::::::::: callout + +## Repositories + +The technical name for the contents of a Docker Hub page is a "repository." +The tag indicates the specific version of the container image that you'd like +to use from a particular repository. So a slightly more accurate version of +the above example is: + +``` +OWNER/REPOSITORY:TAG +``` + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +::::::::::::::::::::::::::::::::::::::: challenge + +## What's in a name? + +How would I download the Docker container image produced by the `rocker` group that +has version 3.6.1 of R and the tidyverse installed? + +Note: the container image described in this exercise is large and won't be used +later in this lesson, so you don't actually need to pull the container image -- +constructing the correct `docker pull` command is sufficient. + +::::::::::::::: solution + +## Solution + +First, search for `rocker` in Docker Hub. Then look for their `tidyverse` container image. +You can look at the list of tags, or just guess that the tag is `3.6.1`. Altogether, +that means that the name of the container image we want to download is: + +```bash +$ docker image pull rocker/tidyverse:3.6.1 +``` + +::::::::::::::::::::::::: + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +## Finding Container Images on Docker Hub + +There are many different container images on Docker Hub. This is where the real advantage +of using containers shows up -- each container image represents a complete software +installation that you can use and access without any extra work! + +The easiest way to find container images is to search on Docker Hub, but sometimes +software pages have a link to their container images from their home page. + +Note that anyone can create an account on Docker Hub and share container images there, +so it's important to exercise caution when choosing a container image on Docker Hub. These +are some indicators that a container image on Docker Hub is consistently maintained, +functional and secure: + +- The container image is updated regularly. +- The container image associated with a well established company, community, or other group that is well-known. +- There is a Dockerfile or other listing of what has been installed to the container image. +- The container image page has documentation on how to use the container image. + +If a container image is never updated, created by a random person, and does not have a lot +of metadata, it is probably worth skipping over. Even if such a container image is secure, it +is not reproducible and not a dependable way to run research computations. + +::::::::::::::::::::::::::::::::::::::: challenge + +## What container image is right for you? + +Find a Docker container image that's relevant to you. Take into account the suggestions +above of what to look for as you evaluate options. If you're unsuccessful in your search, +or don't know what to look for, you can use the R or Python container image we've +already seen. + +Once you find a container image, use the skills from the previous episode to download +the container image and explore it. + + +:::::::::::::::::::::::::::::::::::::::::::::::::: + + + +{% comment %} + + + +{% endcomment %} + +:::::::::::::::::::::::::::::::::::::::: keypoints + +- The Docker Hub is an online repository of container images. +- Many Docker Hub container images are public, and may be officially endorsed. +- Each Docker Hub page about a container image provides structured information and subheadings +- Most Docker Hub pages about container images contain sections that provide examples of how to use those container images. +- Many Docker Hub container images have multiple versions, indicated by tags. +- The naming convention for Docker container images is: `OWNER/CONTAINER_IMAGE_NAME:TAG` + +:::::::::::::::::::::::::::::::::::::::::::::::::: + + diff --git a/episodes/docker-image-examples.md b/episodes/docker-image-examples.md new file mode 100644 index 000000000..5fb493903 --- /dev/null +++ b/episodes/docker-image-examples.md @@ -0,0 +1,83 @@ +--- +title: Examples of Using Container Images in Practice +teaching: 20 +exercises: 0 +--- + +::::::::::::::::::::::::::::::::::::::: objectives + +- Use existing container images and Docker in a research project. + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +:::::::::::::::::::::::::::::::::::::::: questions + +- How can I use Docker for my own work? + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +Now that we have learned the basics of working with Docker container images and containers, +let's apply what we learned to an example workflow. + +You may choose one or more of the following examples to practice using containers. + +## Jekyll Website Example + +In this [Jekyll Website example](../instructors/e02-jekyll-lesson-example.md), you can practice +rendering this lesson website on your computer using the Jekyll static website generator in a Docker container. +Rendering the website in a container avoids a complicated software installation; instead of installing Jekyll and all the other tools needed to create the final website, all the work can be done in the container. +Additionally, when you no longer need to render the website, you can easily and cleanly remove the software from your computer. + +## GitHub Actions Example + +In this [GitHub Actions example](../instructors/e01-github-actions.md), you can learn more about +continuous integration in the cloud and how you can use container images with GitHub to +automate repetitive tasks like testing code or deploying websites. + +{% comment %} + + + +{% endcomment %} + +## Using Containers on an HPC Cluster + +It is possible to run containers on shared computing systems run by a university or national +computing center. As a researcher, you can build container images and test containers on your own +computer and then run your full-scale computing work on a shared computing +system like a high performance cluster or high throughput grid. + +The catch? Most university and national computing centers do not support *running* +containers with Docker commands, and instead use a similar tool called Singularity or +Shifter. However, both of these programs can be used to run containers based on Docker container images, +so often people create their container image as a Docker container image, so they can +run it using either of Docker or Singularity. + +There isn't yet a working example of how to use Docker container images on a shared +computing system, partially because each system is slightly different, but the +following resources show what it can look like: + +- [Introduction to Singularity](https://carpentries-incubator.github.io/singularity-introduction/): See the episode titled "Running MPI parallel jobs using Singularity containers" +- [Container Workflows at Pawsey](https://pawseysc.github.io/container-workflows/): See the episode titled "Run containers on HPC with Shifter (and Singularity)" + +## Seeking Examples + +Do you have another example of using Docker in a workflow related to your field? Please [open a lesson issue] or [submit a pull request] to add it to this episode and the extras section of the lesson. + + + +[submit a pull request]: https://github.com/carpentries-incubator/docker-introduction/pulls + + +:::::::::::::::::::::::::::::::::::::::: keypoints + +- There are many ways you might use Docker and existing container images in your research project. + +:::::::::::::::::::::::::::::::::::::::::::::::::: + + diff --git a/data/.gitkeep b/episodes/fig/.gitkeep similarity index 100% rename from data/.gitkeep rename to episodes/fig/.gitkeep diff --git a/fig/github-gh-pages-branch.png b/episodes/fig/github-gh-pages-branch.png similarity index 100% rename from fig/github-gh-pages-branch.png rename to episodes/fig/github-gh-pages-branch.png diff --git a/fig/github-io-pages.png b/episodes/fig/github-io-pages.png similarity index 100% rename from fig/github-io-pages.png rename to episodes/fig/github-io-pages.png diff --git a/fig/github-main-branch.png b/episodes/fig/github-main-branch.png similarity index 100% rename from fig/github-main-branch.png rename to episodes/fig/github-main-branch.png diff --git a/fig/.gitkeep b/episodes/files/.gitkeep similarity index 100% rename from fig/.gitkeep rename to episodes/files/.gitkeep diff --git a/files/docker-intro.zip b/episodes/files/docker-intro.zip similarity index 100% rename from files/docker-intro.zip rename to episodes/files/docker-intro.zip diff --git a/episodes/introduction.md b/episodes/introduction.md new file mode 100644 index 000000000..9a27b18d9 --- /dev/null +++ b/episodes/introduction.md @@ -0,0 +1,214 @@ +--- +title: Introducing Containers +teaching: 20 +exercises: 0 +--- + +::::::::::::::::::::::::::::::::::::::: objectives + +- Show how software depending on other software leads to configuration management problems. +- Identify the problems that software installation can pose for research. +- Explain the advantages of containerization. +- Explain how using containers can solve software configuration problems + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +:::::::::::::::::::::::::::::::::::::::: questions + +- What are containers, and why might they be useful to me? + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +::::::::::::::::::::::::::::::::::::::::: callout + +## Learning about Docker Containers + +The Australian Research Data Commons has produced a short introductory video +about Docker containers that covers many of the points below. Watch it before +or after you go through this section to reinforce your understanding! + +[How can software containers help your research?](https://www.youtube.com/watch?v=HelrQnm3v4g) + +Australian Research Data Commons, 2021. *How can software containers help your research?*. [video] Available at: [https://www.youtube.com/watch?v=HelrQnm3v4g](https://www.youtube.com/watch?v=HelrQnm3v4g) DOI: [http://doi.org/10.5281/zenodo.5091260](https://doi.org/10.5281/zenodo.5091260) + + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +## Scientific Software Challenges + +::::::::::::::::::::::::::::::::::::::: challenge + +## What's Your Experience? + +Take a minute to think about challenges that you have experienced in using +scientific software (or software in general!) for your research. Then, +share with your neighbors and try to come up with a list of common gripes or +challenges. + + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +You may have come up with some of the following: + +- you want to use software that doesn't exist for the operating system (Mac, Windows, Linux) you'd prefer. +- you struggle with installing a software tool because you have to install a number of other dependencies first. Those dependencies, in turn, require *other* things, and so on (i.e. combinatoric explosion). +- the software you're setting up involves many dependencies and only a subset of all possible versions of those dependencies actually works as desired. +- you're not actually sure what version of the software you're using because the install process was so circuitous. +- you and a colleague are using the same software but get different results because you have installed different versions and/or are using different operating systems. +- you installed everything correctly on your computer but now need to install it on a colleague's computer/campus computing cluster/etc. +- you've written a package for other people to use but a lot of your users frequently have trouble with installation. +- you need to reproduce a research project from a former colleague and the software used was on a system you no longer have access to. + +A lot of these characteristics boil down to one fact: the main program you want +to use likely depends on many, many, different other programs (including the +operating system!), creating a very complex, and often fragile system. One change +or missing piece may stop the whole thing from working or break something that was +already running. It's no surprise that this situation is sometimes +informally termed "dependency hell". + +::::::::::::::::::::::::::::::::::::::: challenge + +## Software and Science + +Again, take a minute to think about how the software challenges we've discussed +could impact (or have impacted!) the quality of your work. +Share your thoughts with your neighbors. What can go wrong if our software +doesn't work? + + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +Unsurprisingly, software installation and configuration challenges can have +negative consequences for research: + +- you can't use a specific tool at all, because it's not available or installable. +- you can't reproduce your results because you're not sure what tools you're actually using. +- you can't access extra/newer resources because you're not able to replicate your software set up. +- others cannot validate and/or build upon your work because they cannot recreate your system's unique configuration. + +Thankfully there are ways to get underneath (a lot of) this mess: containers +to the rescue! Containers provide a way to package up software dependencies +and access to resources such as files and communications networks in a uniform manner. + +## What is a Container? What is Docker? + +[Docker][Docker] is a tool that allows you to build what are called "containers." It's +not the only tool that can create containers, but is the one we've chosen for +this workshop. But what *is* a container? + +To understand containers, let's first talk briefly about your computer. + +Your computer has some standard pieces that allow it to work -- often what's +called the hardware. One of these pieces is the CPU or processor; another is +the amount of memory or RAM that your computer can use to store information +temporarily while running programs; another is the hard drive, which can store +information over the long-term. All these pieces work together to do the +"computing" of a computer, but we don't see them because they're hidden from view (usually). + +Instead, what we see is our desktop, program windows, different folders, and +files. These all live in what's called the filesystem. Everything on your computer -- programs, +pictures, documents, the operating system itself -- lives somewhere in the filesystem. + +NOW, imagine you want to install some new software but don't want to take the chance +of making a mess of your existing system by installing a bunch of additional stuff +(libraries/dependencies/etc.). +You don't want to buy a whole new computer because it's too expensive. +What if, instead, you could have another independent filesystem and running operating system that you could access from your main computer, and that is actually stored within this existing computer? + +Or, imagine you have two tools you want to use in your groundbreaking research on cat memes: `PurrLOLing`, a tool that does AMAZINGLY well at predicting the best text for a meme based on the cat species and `WhiskerSpot`, the only tool available for identifying cat species from images. You want to send cat pictures to `WhiskerSpot`, and then send the species output to `PurrLOLing`. But there's a problem: `PurrLOLing` only works on Ubuntu and `WhiskerSpot` is only supported for OpenSUSE so you can't have them on the same system! Again, we really want another filesystem (or two) on our computer that we could use to chain together `WhiskerSpot` and `PurrLOLing` in a "pipeline"... + +Container systems, like Docker, are special programs on your computer that make it possible! +The term "container" can be usefully considered with reference to shipping +containers. Before shipping containers were developed, packing and unpacking +cargo ships was time consuming and error prone, with high potential for +different clients' goods to become mixed up. Just like shipping containers keep things +together that should stay together, software containers standardize the description and +creation of a complete software system: you can drop a container into any computer with +the container software installed (the 'container host'), and it should "just work". + +::::::::::::::::::::::::::::::::::::::::: callout + +## Virtualization + +Containers are an example of what's called **virtualization** -- having a +second "virtual" computer running and accessible from a main or **host** +computer. Another example of virtualization are **virtual machines** or +VMs. A virtual machine typically contains a whole copy of an operating system in +addition to its own filesystem and has to get booted up in the same way +a computer would. +A container is considered a lightweight version of a virtual machine; +underneath, the container is (usually) using the Linux kernel and simply has some +flavour of Linux + the filesystem inside. + + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +One final term: while the **container** is an alternative filesystem layer that you +can access and run from your computer, the **container image** is the 'recipe' or template +for a container. The container image has all the required information to start +up a running copy of the container. A running container tends to be transient +and can be started and shut down. The container image is more long-lived, as a definition for the container. +You could think of the container image like a cookie cutter -- it +can be used to create multiple copies of the same shape (or container) +and is relatively unchanging, where cookies come and go. If you want a +different type of container (cookie) you need a different container image (cookie cutter). + +## Putting the Pieces Together + +Think back to some of the challenges we described at the beginning. The many layers +of scientific software installations make it hard to install and re-install +scientific software -- which ultimately, hinders reliability and reproducibility. + +But now, think about what a container is -- a self-contained, complete, separate +computer filesystem. What advantages are there if you put your scientific software +tools into containers? + +This solves several of our problems: + +- documentation -- there is a clear record of what software and software dependencies were used, from bottom to top. +- portability -- the container can be used on any computer that has Docker installed -- it doesn't matter whether the computer is Mac, Windows or Linux-based. +- reproducibility -- you can use the exact same software and environment on your computer and on other resources (like a large-scale computing cluster). +- configurability -- containers can be sized to take advantage of more resources (memory, CPU, etc.) on large systems (clusters) or less, depending on the circumstances. + +The rest of this workshop will show you how to download and run containers from pre-existing +container images on your own computer, and how to create and share your own container images. + +## Use cases for containers + +Now that we have discussed a little bit about containers -- what they do and the +issues they attempt to address -- you may be able to think of a few potential use +cases in your area of work. Some examples of common use cases for containers in +a research context include: + +- Using containers solely on your own computer to use a specific software tool + or to test out a tool (possibly to avoid a difficult and complex installation + process, to save your time or to avoid dependency hell). +- Creating a `Dockerfile` that generates a container image with software that you + specify installed, then sharing a container image generated using this Dockerfile with + your collaborators for use on their computers or a remote computing resource + (e.g. cloud-based or HPC system). +- Archiving the container images so you can repeat analysis/modelling using the + same software and configuration in the future -- capturing your workflow. + + + +{% comment %} + + + +{% endcomment %} + +:::::::::::::::::::::::::::::::::::::::: keypoints + +- Almost all software depends on other software components to function, but these components have independent evolutionary paths. +- Small environments that contain only the software that is needed for a given task are easier to replicate and maintain. +- Critical systems that cannot be upgraded, due to cost, difficulty, etc. need to be reproduced on newer systems in a maintainable and self-documented way. +- Virtualization allows multiple environments to run on a single computer. +- Containerization improves upon the virtualization of whole computers by allowing efficient management of the host computer's memory and storage resources. +- Containers are built from 'recipes' that define the required set of software components and the instructions necessary to build/install them within a container image. +- Docker is just one software platform that can create containers and the resources they use. + +:::::::::::::::::::::::::::::::::::::::::::::::::: + + diff --git a/episodes/managing-containers.md b/episodes/managing-containers.md new file mode 100644 index 000000000..004966bb7 --- /dev/null +++ b/episodes/managing-containers.md @@ -0,0 +1,184 @@ +--- +title: Cleaning Up Containers +teaching: 10 +exercises: 0 +--- + +::::::::::::::::::::::::::::::::::::::: objectives + +- Explain how to list running and completed containers. +- Know how to list and remove container images. + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +:::::::::::::::::::::::::::::::::::::::: questions + +- How do I interact with a Docker container on my computer? +- How do I manage my containers and container images? + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +## Removing images + +The container images and their corresponding containers can start to take up a lot of disk space if you don't clean them up occasionally, so it's a good idea to periodically remove containers and container images that you won't be using anymore. + +In order to remove a specific container image, you need to find out details about the container image, +specifically, the "Image ID". For example, say my laptop contained the following container image: + +```bash +$ docker image ls +``` + +```output +REPOSITORY TAG IMAGE ID CREATED SIZE +hello-world latest fce289e99eb9 15 months ago 1.84kB +``` + +You can remove the container image with a `docker image rm` command that includes the *Image ID*, such as: + +```bash +$ docker image rm fce289e99eb9 +``` + +or use the container image name, like so: + +```bash +$ docker image rm hello-world +``` + +However, you may see this output: + +```output +Error response from daemon: conflict: unable to remove repository reference "hello-world" (must force) - container e7d3b76b00f4 is using its referenced image fce289e99eb9 +``` + +This happens when Docker hasn't cleaned up some of the previously running containers +based on this container image. So, before removing the container image, we need to be able +to see what containers are currently running, or have been run recently, and how +to remove these. + +## What containers are running? + +Working with containers, we are going to shift back to the command: `docker container`. Similar to `docker image`, we can list running containers by typing: + +```bash +$ docker container ls +``` + +```output +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +``` + +Notice that this command didn't return any containers because our containers all exited and thus stopped running after they completed their work. + +::::::::::::::::::::::::::::::::::::::::: callout + +## `docker ps` + +The command `docker ps` serves the same purpose as `docker container ls`, and comes +from the Unix shell command `ps` which describes running processes. + + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +## What containers have run recently? + +There is also a way to list running containers, and those that have completed recently, which is to add the `--all`/`-a` flag to the `docker container ls` command as shown below. + +```bash +$ docker container ls --all +``` + +```output +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +9c698655416a hello-world "/hello" 2 minutes ago Exited (0) 2 minutes ago zen_dubinsky +6dd822cf6ca9 hello-world "/hello" 3 minutes ago Exited (0) 3 minutes ago eager_engelbart +``` + +::::::::::::::::::::::::::::::::::::::::: callout + +## Keeping it clean + +You might be surprised at the number of containers Docker is still keeping track of. +One way to prevent this from happening is to add the `--rm` flag to `docker container run`. This +will completely wipe out the record of the run container when it exits. If you need +a reference to the running container for any reason, **don't** use this flag. + + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +## How do I remove an exited container? + +To delete an exited container you can run the following command, inserting the `CONTAINER ID` for the container you wish to remove. +It will repeat the `CONTAINER ID` back to you, if successful. + +```bash +$ docker container rm 9c698655416a +``` + +```output +9c698655416a +``` + +An alternative option for deleting exited containers is the `docker container prune` command. Note that this command doesn't accept a container ID as an +option because it deletes ALL exited containers! +**Be careful** with this command as deleting the container is **forever**. +**Once a container is deleted you can not get it back.** +If you have containers you may want to reconnect to, you should **not** use this command. +It will ask you if to confirm you want to remove these containers, see output below. +If successful it will print the full `CONTAINER ID` back to you for each container it has +removed. + +```bash +$ docker container prune +``` + +```output +WARNING! This will remove all stopped containers. +Are you sure you want to continue? [y/N] y +Deleted Containers: +9c698655416a848278d16bb1352b97e72b7ea85884bff8f106877afe0210acfc +6dd822cf6ca92f3040eaecbd26ad2af63595f30bb7e7a20eacf4554f6ccc9b2b +``` + +## Removing images, for real this time + +Now that we've removed any potentially running or stopped containers, we can try again to +delete the `hello-world` **container image**. + +```bash +$ docker image rm hello-world +``` + +```output +Untagged: hello-world:latest +Untagged: hello-world@sha256:5f179596a7335398b805f036f7e8561b6f0e32cd30a32f5e19d17a3cda6cc33d +Deleted: sha256:fce289e99eb9bca977dae136fbe2a82b6b7d4c372474c9235adc1741675f587e +Deleted: sha256:af0b15c8625bb1938f1d7b17081031f649fd14e6b233688eea3c5483994a66a3 +``` + +The reason that there are a few lines of output, is that a given container image may have been formed by merging multiple underlying layers. +Any layers that are used by multiple Docker container images will only be stored once. +Now the result of `docker image ls` should no longer include the `hello-world` container image. + + + +{% comment %} + + + + + +{% endcomment %} + +:::::::::::::::::::::::::::::::::::::::: keypoints + +- `docker container` has subcommands used to interact and manage containers. +- `docker image` has subcommands used to interact and manage container images. +- `docker container ls` or `docker ps` can provide information on currently running containers. + +:::::::::::::::::::::::::::::::::::::::::::::::::: + + diff --git a/episodes/meet-docker.md b/episodes/meet-docker.md new file mode 100644 index 000000000..e93727877 --- /dev/null +++ b/episodes/meet-docker.md @@ -0,0 +1,364 @@ +--- +title: Introducing the Docker Command Line +teaching: 10 +exercises: 0 +--- + +::::::::::::::::::::::::::::::::::::::: objectives + +- Explain how to check that Docker is installed and is ready to use. +- Demonstrate some initial Docker command line interactions. +- Use the built-in help for Docker commands. + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +:::::::::::::::::::::::::::::::::::::::: questions + +- How do I know Docker is installed and running? +- How do I interact with Docker? + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +## Docker command line + +Start the Docker application that you installed in working through the setup instructions for this session. Note that this might not be necessary if your laptop is running Linux or if the installation added the Docker application to your startup process. + +::::::::::::::::::::::::::::::::::::::::: callout + +## You may need to login to Docker Hub + +The Docker application will usually provide a way for you to log in to the Docker Hub using the application's menu (macOS) or systray +icon (Windows) and it is usually convenient to do this when the application starts. This will require you to use your Docker Hub +username and your password. We will not actually require access to the Docker Hub until later in the course but if you can login now, +you should do so. + + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +::::::::::::::::::::::::::::::::::::::::: callout + +## Determining your Docker Hub username + +If you no longer recall your Docker Hub username, e.g., because you have been logging into the Docker Hub using your email address, +you can find out what it is through the steps: + +- Open [https://hub.docker.com/](https://hub.docker.com/) in a web browser window +- Sign-in using your email and password (don't tell us what it is) +- In the top-right of the screen you will see your username + + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +Once your Docker application is running, open a shell (terminal) window, and run the following command to check that Docker is installed and the command line tools are working correctly. Below is the output for a Mac version, but the specific version is unlikely to matter much: it does not have to precisely match the one listed below. + +```bash +$ docker --version +``` + +```output +Docker version 20.10.5, build 55c4c88 +``` + +The above command has not actually relied on the part of Docker that runs containers, just that Docker +is installed and you can access it correctly from the command line. + +A command that checks that Docker is working correctly is the `docker container ls` command (we cover this command in more detail later in the course). + +Without explaining the details, output on a newly installed system would likely be: + +```bash +$ docker container ls +``` + +```output +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +``` + +(The command `docker system info` could also be used to verify that Docker is correctly installed and operational but it produces a larger amount of output.) + +However, if you instead get a message similar to the following + +```output +Cannot connect to the Docker daemon at unix:///var/run/docker.sock. Is the docker daemon running? +``` + +then you need to check that you have started the Docker Desktop, Docker Engine, or however else you worked through the setup instructions. + +## Getting help + +Often when working with a new command line tool, we need to get help. These tools often have some +sort of subcommand or flag (usually `help`, `-h`, or `--help`) that displays a prompt describing how to use the +tool. For Docker, it's no different. If we run `docker --help`, we see the following output (running `docker` also produces the help message): + +```output + +Usage: docker [OPTIONS] COMMAND + +A self-sufficient runtime for containers + +Options: + --config string Location of client config files (default "/Users/vini/.docker") + -c, --context string Name of the context to use to connect to the daemon (overrides DOCKER_HOST env var and default context set with "docker context use") + -D, --debug Enable debug mode + -H, --host list Daemon socket(s) to connect to + -l, --log-level string Set the logging level ("debug"|"info"|"warn"|"error"|"fatal") (default "info") + --tls Use TLS; implied by --tlsverify + --tlscacert string Trust certs signed only by this CA (default "/Users/vini/.docker/ca.pem") + --tlscert string Path to TLS certificate file (default "/Users/vini/.docker/cert.pem") + --tlskey string Path to TLS key file (default "/Users/vini/.docker/key.pem") + --tlsverify Use TLS and verify the remote + -v, --version Print version information and quit + +Management Commands: + app* Docker App (Docker Inc., v0.9.1-beta3) + builder Manage builds + buildx* Build with BuildKit (Docker Inc., v0.5.1-docker) + config Manage Docker configs + container Manage containers + context Manage contexts + image Manage images + manifest Manage Docker image manifests and manifest lists + network Manage networks + node Manage Swarm nodes + plugin Manage plugins + scan* Docker Scan (Docker Inc., v0.6.0) + secret Manage Docker secrets + service Manage services + stack Manage Docker stacks + swarm Manage Swarm + system Manage Docker + trust Manage trust on Docker images + volume Manage volumes + +Commands: + attach Attach local standard input, output, and error streams to a running container + build Build an image from a Dockerfile + commit Create a new image from a container's changes + cp Copy files/folders between a container and the local filesystem + create Create a new container + diff Inspect changes to files or directories on a container's filesystem + events Get real time events from the server + exec Run a command in a running container + export Export a container's filesystem as a tar archive + history Show the history of an image + images List images + import Import the contents from a tarball to create a filesystem image + info Display system-wide information + inspect Return low-level information on Docker objects + kill Kill one or more running containers + load Load an image from a tar archive or STDIN + login Log in to a Docker registry + logout Log out from a Docker registry + logs Fetch the logs of a container + pause Pause all processes within one or more containers + port List port mappings or a specific mapping for the container + ps List containers + pull Pull an image or a repository from a registry + push Push an image or a repository to a registry + rename Rename a container + restart Restart one or more containers + rm Remove one or more containers + rmi Remove one or more images + run Run a command in a new container + save Save one or more images to a tar archive (streamed to STDOUT by default) + search Search the Docker Hub for images + start Start one or more stopped containers + stats Display a live stream of container(s) resource usage statistics + stop Stop one or more running containers + tag Create a tag TARGET_IMAGE that refers to SOURCE_IMAGE + top Display the running processes of a container + unpause Unpause all processes within one or more containers + update Update configuration of one or more containers + version Show the Docker version information + wait Block until one or more containers stop, then print their exit codes + +Run 'docker COMMAND --help' for more information on a command. +``` + +There is a list of commands and the end of the help message says: `Run 'docker COMMAND --help' for more information on a command.` For example, take the `docker container ls` command that we ran previously. We can see from the Docker help prompt +that `container` is a Docker command, so to get help for that command, we run: + +```bash +docker container --help # or instead 'docker container' +``` + +```output + +Usage: docker container COMMAND + +Manage containers + +Commands: + attach Attach local standard input, output, and error streams to a running container + commit Create a new image from a container's changes + cp Copy files/folders between a container and the local filesystem + create Create a new container + diff Inspect changes to files or directories on a container's filesystem + exec Run a command in a running container + export Export a container's filesystem as a tar archive + inspect Display detailed information on one or more containers + kill Kill one or more running containers + logs Fetch the logs of a container + ls List containers + pause Pause all processes within one or more containers + port List port mappings or a specific mapping for the container + prune Remove all stopped containers + rename Rename a container + restart Restart one or more containers + rm Remove one or more containers + run Run a command in a new container + start Start one or more stopped containers + stats Display a live stream of container(s) resource usage statistics + stop Stop one or more running containers + top Display the running processes of a container + unpause Unpause all processes within one or more containers + update Update configuration of one or more containers + wait Block until one or more containers stop, then print their exit codes + +Run 'docker container COMMAND --help' for more information on a command. +``` + +There's also help for the `container ls` command: + +```bash +docker container ls --help # this one actually requires the '--help' flag +``` + +```output +Usage: docker container ls [OPTIONS] + +List containers + +Aliases: + ls, ps, list + +Options: + -a, --all Show all containers (default shows just running) + -f, --filter filter Filter output based on conditions provided + --format string Pretty-print containers using a Go template + -n, --last int Show n last created containers (includes all states) (default -1) + -l, --latest Show the latest created container (includes all states) + --no-trunc Don't truncate output + -q, --quiet Only display container IDs + -s, --size Display total file sizes +``` + +You may notice that there are many commands that stem from the `docker` command. Instead of trying to remember +all possible commands and options, it's better to learn how to effectively get help from the command line. Although +we can always search the web, getting the built-in help from our tool is often much faster and may provide the answer +right away. This applies not only to Docker, but also to most command line-based tools. + +::::::::::::::::::::::::::::::::::::::::: callout + +## Docker Command Line Interface (CLI) syntax + +In this lesson we use the newest Docker CLI syntax +[introduced with the Docker Engine version 1.13](https://www.docker.com/blog/whats-new-in-docker-1-13/). +This new syntax combines commands into groups you will most often +want to interact with. In the help example above you can see `image` and `container` +management commands, which can be used to interact with your images and +containers respectively. With this new syntax you issue commands using the following +pattern `docker [command] [subcommand] [additional options]` + +Comparing the output of two help commands above, you can +see that the same thing can be achieved in multiple ways. For example to start a +Docker container using the old syntax you would use `docker run`. To achieve the +same with the new syntax, you use `docker container run` instead. Even though the old +approach is shorter and still officially supported, the new syntax is more descriptive, less +error-prone and is therefore recommended. + + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +::::::::::::::::::::::::::::::::::::::: challenge + +## Exploring a command + +Run `docker --help` and pick a command from the list. +Explore the help prompt for that command. Try to guess how a command would work by looking at the `Usage: ` +section of the prompt. + +::::::::::::::: solution + +## Solution + +Suppose we pick the `docker image build` command: + +```bash +docker image build --help +``` + +```output +Usage: docker image build [OPTIONS] PATH | URL | - + +Build an image from a Dockerfile + +Options: + --add-host list Add a custom host-to-IP mapping (host:ip) + --build-arg list Set build-time variables + --cache-from strings Images to consider as cache sources + --cgroup-parent string Optional parent cgroup for the container + --compress Compress the build context using gzip + --cpu-period int Limit the CPU CFS (Completely Fair Scheduler) period + --cpu-quota int Limit the CPU CFS (Completely Fair Scheduler) quota + -c, --cpu-shares int CPU shares (relative weight) + --cpuset-cpus string CPUs in which to allow execution (0-3, 0,1) + --cpuset-mems string MEMs in which to allow execution (0-3, 0,1) + --disable-content-trust Skip image verification (default true) + -f, --file string Name of the Dockerfile (Default is 'PATH/Dockerfile') + --force-rm Always remove intermediate containers + --iidfile string Write the image ID to the file + --isolation string Container isolation technology + --label list Set metadata for an image + -m, --memory bytes Memory limit + --memory-swap bytes Swap limit equal to memory plus swap: '-1' to enable unlimited swap + --network string Set the networking mode for the RUN instructions during build (default "default") + --no-cache Do not use cache when building the image + --pull Always attempt to pull a newer version of the image + -q, --quiet Suppress the build output and print image ID on success + --rm Remove intermediate containers after a successful build (default true) + --security-opt strings Security options + --shm-size bytes Size of /dev/shm + -t, --tag list Name and optionally a tag in the 'name:tag' format + --target string Set the target build stage to build. + --ulimit ulimit Ulimit options (default []) +``` + +We could try to guess that the command could be run like this: + +```bash +docker image build . +``` + +or + +```bash +docker image build https://github.com/docker/rootfs.git +``` + +Where `https://github.com/docker/rootfs.git` could be any relevant URL that supports a Docker image. + + + +::::::::::::::::::::::::: + +:::::::::::::::::::::::::::::::::::::::::::::::::: + + + +{% comment %} + + + +{% endcomment %} + +:::::::::::::::::::::::::::::::::::::::: keypoints + +- A toolbar icon indicates that Docker is ready to use (on Windows and macOS). +- You will typically interact with Docker using the command line. +- To learn how to run a certain Docker command, we can type the command followed by the `--help` flag. + +:::::::::::::::::::::::::::::::::::::::::::::::::: + + diff --git a/episodes/reproduciblity.md b/episodes/reproduciblity.md new file mode 100644 index 000000000..0426b204e --- /dev/null +++ b/episodes/reproduciblity.md @@ -0,0 +1,171 @@ +--- +title: 'Containers in Research Workflows: Reproducibility and Granularity' +teaching: 20 +exercises: 0 +--- + +::::::::::::::::::::::::::::::::::::::: objectives + +- Understand how container images can help make research more reproducible. +- Understand what practical steps I can take to improve the reproducibility of my research using containers. + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +:::::::::::::::::::::::::::::::::::::::: questions + +- How can I use container images to make my research more reproducible? +- How do I incorporate containers into my research workflow? + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +Although this workshop is titled "Reproducible computational environments using containers", +so far we have mostly covered the mechanics of using Docker with only passing reference to +the reproducibility aspects. In this section, we discuss these aspects in more detail. + +::::::::::::::::::::::::::::::::::::::::: callout + +## Work in progress... + +Note that reproducibility aspects of software and containers are an active area of research, discussion and development so are subject to many changes. We will present some ideas and approaches here but best practices will likely evolve in the near future. + + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +## Reproducibility + +By *reproducibility* here we mean the ability of someone else (or your future self) being able to reproduce +what you did computationally at a particular time (be this in research, analysis or something else) +as closely as possible even if they do not have access to exactly the same hardware resources +that you had when you did the original work. + +Some examples of why containers are an attractive technology to help with reproducibility include: + +- The same computational work can be run across multiple different technologies seamlessly (e.g. Windows, macOS, Linux). +- You can save the exact process that you used for your computational work (rather than relying on potentially incomplete notes). +- You can save the exact versions of software and their dependencies in the container image. +- You can access legacy versions of software and underlying dependencies which may not be generally available any more. +- Depending on their size, you can also potentially store a copy of key data within the container image. +- You can archive and share the container image as well as associating a persistent identifier with a container image to allow other researchers to reproduce and build on your work. + +## Sharing images + +As we have already seen, the Docker Hub provides a platform for sharing container images publicly. Once you have uploaded a container image, you can point people to its public location and they can download and build upon it. + +This is fine for working collaboratively with container images on a day-to-day basis but the Docker Hub is not a good option for long time archive of container images in support of research and publications as: + +- free accounts have a limit on how long a container image will be hosted if it is not updated +- it does not support adding persistent identifiers to container images +- it is easy to overwrite tagged container images with newer versions by mistake. + +## Archiving and persistently identifying container images using Zenodo + +When you publish your work or make it publicly available in some way it is good practice to make container images that you used for computational work available in an immutable, persistent way and to have an identifier that allows people to cite and give you credit for the work you have done. [Zenodo](https://zenodo.org/) is one service that provides this functionality. + +Zenodo supports the upload of *tar* archives and we can capture our Docker container images as tar archives using the `docker image save` command. For example, to export the container image we created earlier in this lesson: + +```bash +docker image save alice/alpine-python:v1 -o alpine-python.tar +``` + +These tar container images can become quite large and Zenodo supports uploads up to 50GB so you may need to compress your archive to make it fit on Zenodo using a tool such as gzip (or zip): + +```bash +gzip alpine-python.tar +``` + +Once you have your archive, you can [deposit it on Zenodo](https://zenodo.org/deposit/) and this will: + +- Create a long-term archive snapshot of your Docker container image which people (including your future self) can download and reuse or reproduce your work. +- Create a persistent DOI (*Digital Object Identifier*) that you can cite in any publications or outputs to enable reproducibility and recognition of your work. + +In addition to the archive file itself, the deposit process will ask you to provide some basic metadata to classify the container image and the associated work. + +Note that Zenodo is not the only option for archiving and generating persistent DOIs for container images. There are other services out there -- for example, some organizations may provide their own, equivalent, service. + +## Reproducibility good practice + +- Make use of container images to capture the computational environment required for your work. +- Decide on the appropriate granularity for the container images you will use for your computational work -- this will be different for each project/area. Take note of accepted practice from contemporary work in the same area. What are the right building blocks for individual container images in your work? +- Document what you have done and why -- this can be put in comments in the `Dockerfile` and the use of the container image described in associated documentation and/or publications. Make sure that references are made in both directions so that the container image and the documentation are appropriately linked. +- When you publish work (in whatever way) use an archiving and DOI service such as Zenodo to make sure your container image is captured as it was used for the work and that is obtains a persistent DOI to allow it to be cited and referenced properly. + +## Container Granularity + +As mentioned above, one of the decisions you may need to make when containerising your research workflows +is what level of *granularity* you wish to employ. The two extremes of this decision could be characterized +as: + +- Create a single container image with all the tools you require for your research or analysis workflow +- Create many container images each running a single command (or step) of the workflow and use them together + +Of course, many real applications will sit somewhere between these two extremes. + +::::::::::::::::::::::::::::::::::::::: challenge + +## Positives and negatives + +What are the advantages and disadvantages of the two approaches to container granularity for research +workflows described above? Think about this +and write a few bullet points for advantages and disadvantages for each approach in the course Etherpad. + +::::::::::::::: solution + +## Solution + +This is not an exhaustive list but some of the advantages and disadvantages could be: + +### Single large container image + +- Advantages: + - Simpler to document + - Full set of requirements packaged in one place + - Potentially easier to maintain (though could be opposite if working with large, distributed group) +- Disadvantages: + - Could get very large in size, making it more difficult to distribute + - Could use [Docker multi-stage build](https://docs.docker.com/develop/develop-images/multistage-build) to reduce size + - May end up with same dependency issues within the container image from different software requirements + - Potentially more complex to test + - Less re-useable for different, but related, work + +### Multiple smaller container images + +- Advantages: + - Individual components can be re-used for different, but related, work + - Individual parts are smaller in size making them easier to distribute + - Avoid dependency issues between different pieces of software + - Easier to test +- Disadvantage: + - More difficult to document + - Potentially more difficult to maintain (though could be easier if working with large, distributed group) + - May end up with dependency issues between component container images if they get out of sync + + + +::::::::::::::::::::::::: + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +::::::::::::::::::::::::::::::::::::::: challenge + +## Next steps with containers + +Now that we're at the end of the lesson material, take a moment to reflect on +what you've learned, how it applies to you, and what to do next. + +1. In your own notes, write down or diagram your understanding of Docker containers and container images: + concepts, commands, and how they work. +2. In the workshop's shared notes document, write down how you think you might + use containers in your daily work. If there's something you want to try doing with + containers right away, what is a next step after this workshop to make that happen? + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +:::::::::::::::::::::::::::::::::::::::: keypoints + +- Container images allow us to encapsulate the computation (and data) we have used in our research. +- Using a service such as Docker Hub allows us to easily share computational work we have done. +- Using container images along with a DOI service such as Zenodo allows us to capture our work and enables reproducibility. + +:::::::::::::::::::::::::::::::::::::::::::::::::: + + diff --git a/episodes/running-containers.md b/episodes/running-containers.md new file mode 100644 index 000000000..6d1971f2d --- /dev/null +++ b/episodes/running-containers.md @@ -0,0 +1,372 @@ +--- +title: Exploring and Running Containers +teaching: 20 +exercises: 10 +--- + +::::::::::::::::::::::::::::::::::::::: objectives + +- Use the correct command to see which Docker container images are on your computer. +- Be able to download new Docker container images. +- Demonstrate how to start an instance of a container from a container image. +- Describe at least two ways to execute commands inside a running Docker container. + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +:::::::::::::::::::::::::::::::::::::::: questions + +- How do I interact with Docker containers and container images on my computer? + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +::::::::::::::::::::::::::::::::::::::::: callout + +## Reminder of terminology: container images and containers + +Recall that a *container image* is the template from which particular instances of *containers* will be created. + + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +Let's explore our first Docker container. The Docker team provides a simple container +image online called `hello-world`. We'll start with that one. + +## Downloading Docker images + +The `docker image` command is used to interact with Docker container images. +You can find out what container images you have on your computer by using the following command ("ls" is short for "list"): + +```bash +$ docker image ls +``` + +If you've just +installed Docker, you won't see any container images listed. + +To get a copy of the `hello-world` Docker container image from the internet, run this command: + +```bash +$ docker image pull hello-world +``` + +You should see output like this: + +```output +Using default tag: latest +latest: Pulling from library/hello-world +1b930d010525: Pull complete +Digest: sha256:f9dfddf63636d84ef479d645ab5885156ae030f611a56f3a7ac7f2fdd86d7e4e +Status: Downloaded newer image for hello-world:latest +docker.io/library/hello-world:latest +``` + +::::::::::::::::::::::::::::::::::::::::: callout + +## Docker Hub + +Where did the `hello-world` container image come from? It came from the Docker Hub +website, which is a place to share Docker container images with other people. More on that +in a later episode. + + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +::::::::::::::::::::::::::::::::::::::: challenge + +## Exercise: Check on Your Images + +What command would you use to see if the `hello-world` Docker container image had downloaded +successfully and was on your computer? +Give it a try before checking the solution. + +::::::::::::::: solution + +## Solution + +To see if the `hello-world` container image is now on your computer, run: + +```bash +$ docker image ls +``` + +::::::::::::::::::::::::: + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +Note that the downloaded `hello-world` container image is not in the folder where you are in the terminal! (Run +`ls` by itself to check.) The container image is not a file like our normal programs and documents; +Docker stores it in a specific location that isn't commonly accessed, so it's necessary +to use the special `docker image` command to see what Docker container images you have on your +computer. + +## Running the `hello-world` container + +To create and run containers from named Docker container images you use the `docker container run` command. Try the following `docker container run` invocation. Note that it does not matter what your current working directory is. + +```bash +$ docker container run hello-world +``` + +```output +Hello from Docker! +This message shows that your installation appears to be working correctly. + +To generate this message, Docker took the following steps: + 1. The Docker client contacted the Docker daemon. + 2. The Docker daemon pulled the "hello-world" image from the Docker Hub. + (amd64) + 3. The Docker daemon created a new container from that image which runs the + executable that produces the output you are currently reading. + 4. The Docker daemon streamed that output to the Docker client, which sent it + to your terminal. + +To try something more ambitious, you can run an Ubuntu container with: + $ docker run -it ubuntu bash + +Share images, automate workflows, and more with a free Docker ID: + https://hub.docker.com/ + +For more examples and ideas, visit: + https://docs.docker.com/get-started/ +``` + +What just happened? When we use the `docker container run` command, Docker does three things: + +| 1\. Starts a Running Container | 2\. Performs Default Action | 3\. Shuts Down the Container | +| --------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------- | +| Starts a running container, based on the container image. Think of this as the "alive" or "inflated" version of the container -- it's actually doing something. | If the container has a default action set, it will perform that default action. This could be as simple as printing a message (as above) or running a whole analysis pipeline! | Once the default action is complete, the container stops running (or exits). The container image is still there, but nothing is actively running. | + +The `hello-world` container is set up to run an action by default -- +namely to print this message. + +::::::::::::::::::::::::::::::::::::::::: callout + +## Using `docker container run` to get the image + +We could have skipped the `docker image pull` step; if you use the `docker container run` +command and you don't already have a copy of the Docker container image, Docker will +automatically pull the container image first and then run it. + + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +## Running a container with a chosen command + +But what if we wanted to do something different with the container? The output +just gave us a suggestion of what to do -- let's use a different Docker container image +to explore what else we can do with the `docker container run` command. The suggestion above +is to use `ubuntu`, but we're going to run a different type of Linux, `alpine` +instead because it's quicker to download. + +::::::::::::::::::::::::::::::::::::::: challenge + +## Run the Alpine Docker container + +Try downloading the `alpine` container image and using it to run a container. You can do it in +two steps, or one. What are they? + + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +What happened when you ran the Alpine Docker container? + +```bash +$ docker container run alpine +``` + +If you have never used the `alpine` Docker container image on your computer, Docker probably printed a +message that it couldn't find the container image and had to download it. +If you used the `alpine` container image before, the command will probably show no output. That's +because this particular container is designed for you to provide commands yourself. Try running +this instead: + +```bash +$ docker container run alpine cat /etc/os-release +``` + +You should see the output of the `cat /etc/os-release` command, which prints out +the version of Alpine Linux that this container is using and a few additional bits of information. + +::::::::::::::::::::::::::::::::::::::: challenge + +## Hello World, Part 2 + +Can you run a copy of the `alpine` container and make it print a "hello world" message? + +Give it a try before checking the solution. + +::::::::::::::: solution + +## Solution + +Use the same command as above, but with the `echo` command to print a message. + +```bash +$ docker container run alpine echo 'Hello World' +``` + +::::::::::::::::::::::::: + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +So here, we see another option -- we can provide commands at the end of the `docker container run` +command and they will execute inside the running container. + +## Running containers interactively + +In all the examples above, Docker has started the container, run a command, and then +immediately stopped the container. But what if we wanted to keep the container +running so we could log into it and test drive more commands? The way to +do this is by adding the interactive flags `-i` and `-t` (usually combined as `-it`) +to the `docker container run` command and provide a shell (`bash`,`sh`, etc.) +as our command. The `alpine` Docker container image doesn't include `bash` so we need +to use `sh`. + +```bash +$ docker container run -it alpine sh +``` + +::::::::::::::::::::::::::::::::::::::::: callout + +## Technically... + +Technically, the interactive flag is just `-i` -- the extra `-t` (combined +as `-it` above) is the "pseudo-TTY" option, a fancy term that means a text interface. +This allows you to connect to a shell, like `sh`, using a command line. Since you usually +want to have a command line when running interactively, it makes sense to use the two together. + + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +Your prompt should change significantly to look like this: + +```bash +/ # +``` + +That's because you're now inside the running container! Try these commands: + +- `pwd` +- `ls` +- `whoami` +- `echo $PATH` +- `cat /etc/os-release` + +All of these are being run from inside the running container, so you'll get information +about the container itself, instead of your computer. To finish using the container, +type `exit`. + +```bash +/ # exit +``` + +::::::::::::::::::::::::::::::::::::::: challenge + +## Practice Makes Perfect + +Can you find out the version of Ubuntu installed on the `ubuntu` container image? +(Hint: You can use the same command as used to find the version of alpine.) + +Can you also find the `apt-get` program? What does it do? (Hint: try passing `--help` +to almost any command will give you more information.) + +::::::::::::::: solution + +## Solution 1 -- Interactive + +Run an interactive ubuntu container -- you can use `docker image pull` first, or just +run it with this command: + +```bash +$ docker container run -it ubuntu sh +``` + +OR you can get the bash shell instead + +```bash +$ docker container run -it ubuntu bash +``` + +Then try, running these commands + +```bash +/# cat /etc/os-release +/# apt-get --help +``` + +Exit when you're done. + +```bash +/# exit +``` + +::::::::::::::::::::::::: + +::::::::::::::: solution + +## Solution 2 -- Run commands + +Run a ubuntu container, first with a command to read out the Linux version: + +```bash +$ docker container run ubuntu cat /etc/os-release +``` + +Then run a container with a command to print out the apt-get help: + +```bash +$ docker container run ubuntu apt-get --help +``` + +::::::::::::::::::::::::: + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +::::::::::::::::::::::::::::::::::::::::: callout + +## Even More Options + +There are many more options, besides `-it` that can be used with the `docker container run` +command! A few of them will be covered in [later episodes](/advanced-containers) +and we'll share two more common ones here: + +- `--rm`: this option guarantees that any running container is completely + removed from your computer after the container is stopped. Without this option, + Docker actually keeps the "stopped" container around, which you'll see in a later + episode. Note that this option doesn't impact the *container images* that you've pulled, + just running instances of containers. + +- `--name=`: By default, Docker assigns a random name and ID number to each container + instance that you run on your computer. If you want to be able to more easily refer + to a specific running container, you can assign it a name using this option. + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +## Conclusion + +So far, we've seen how to download Docker container images, use them to run commands inside +running containers, and even how to explore a running container from the inside. +Next, we'll take a closer look at all the different kinds of Docker container images that are out there. + + + +{% comment %} + + + + + +{% endcomment %} + +:::::::::::::::::::::::::::::::::::::::: keypoints + +- The `docker image pull` command downloads Docker container images from the internet. +- The `docker image ls` command lists Docker container images that are (now) on your computer. +- The `docker container run` command creates running containers from container images and can run commands inside them. +- When using the `docker container run` command, a container can run a default action (if it has one), a user specified action, or a shell to be used interactively. + +:::::::::::::::::::::::::::::::::::::::::::::::::: + + diff --git a/files/.gitkeep b/files/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/index.md b/index.md index e97a14ca2..16c6e0a8e 100644 --- a/index.md +++ b/index.md @@ -1,56 +1,80 @@ --- -layout: lesson -root: . # Is the only page that doesn't follow the pattern /:path/index.html -permalink: index.html # Is the only page that doesn't follow the pattern /:path/index.html +permalink: index.html +site: sandpaper::sandpaper_site --- + +> **ATTENTION** This is an experimental test of [The Carpentries Workbench](https://carpentries.github.io/workbench) lesson infrastructure. +> It was automatically converted from the source lesson via [the lesson transition script](https://github.com/carpentries/lesson-transition/). +> +> If anything seems off, please contact Zhian Kamvar [zkamvar@carpentries.org](mailto:zkamvar@carpentries.org) + This session aims to introduce the use of Docker containers with the goal of using them to effect reproducible computational environments. Such environments are useful for ensuring reproducible research outputs, for example. -> ## After completing this session you should: -> - Have an understanding of what Docker containers are, why they are useful -> and the common terminology used -> - Have a working Docker installation on your local system to allow you to -> use containers -> - Understand how to use existing Docker containers for common tasks -> - Be able to build your own Docker containers by understanding both the role -> of a `Dockerfile` in building containers, and the syntax used in `Dockerfile`s -> - Understand how to manage Docker containers on your local system -> - Appreciate issues around reproducibility in software, understand how -> containers can address some of these issues and what the limits to -> reproducibility using containers are -{: .objectives} +:::::::::::::::::::::::::::::::::::::: objectives + +## After completing this session you should: + +- Have an understanding of what Docker containers are, why they are useful + and the common terminology used +- Have a working Docker installation on your local system to allow you to + use containers +- Understand how to use existing Docker containers for common tasks +- Be able to build your own Docker containers by understanding both the role + of a `Dockerfile` in building containers, and the syntax used in `Dockerfile`s +- Understand how to manage Docker containers on your local system +- Appreciate issues around reproducibility in software, understand how + containers can address some of these issues and what the limits to + reproducibility using containers are + + +:::::::::::::::::::::::::::::::::::::::::::::::::: The practical work in this lesson is primarily aimed at using Docker on your own laptop. Beyond your laptop, software container technologies such as Docker can also be used in the cloud and on high performance computing (HPC) systems. Some of the material in this lesson will be applicable to those environments too. -> ## Containers on HPC systems -> On HPC systems it is more likely that *Singularity* rather than Docker will be the available container technology. -> If you are looking for a lesson on using Singularity containers (instead of Docker), see this lesson: -> * [Reproducible Computational Environments Using Containers: Introduction to Singularity](https://carpentries-incubator.github.io/singularity-introduction/) -{: .callout} - -> ## Prerequisites -> -> - You should have basic familiarity with using a command shell, and the lesson text will at times request that you "open a shell window", with an assumption that you know what this means. -> - Under Linux or macOS it is assumed that you will access a `bash` shell (usually the default), using your Terminal application. -> - Under Windows, Powershell and Git Bash should allow you to use the Unix instructions. We will also try to give command variants for Windows `cmd.exe`. -> - The lessons will sometimes request that you use a text editor to create or edit files in particular directories. It is assumed that you either have an editor that you know how to use that runs within the working directory of your shell window (e.g. `nano`), or that if you use a graphical editor, that you can use it to read and write files into the working directory of your shell. -{: .prereq} - -> ## A note about Docker -> -> Docker is a mature, robust and very widely used application. Nonetheless, -> it is still under extensive development. New versions are released regularly -> often containing a range of updates and new features. -> -> While we do our best to ensure that this lesson remains up to date and the -> descriptions and outputs shown match what you will see on your own computer, -> inconsistencies can occur. -> -> If you spot inconsistencies or encounter any problems, please do report them -> by [opening an issue][open a lesson issue] in the [GitHub repository][docker-introduction repository] -> for this lesson. -{: .callout} +::::::::::::::::::::::::::::::::::::::::: callout + +## Containers on HPC systems + +On HPC systems it is more likely that *Singularity* rather than Docker will be the available container technology. +If you are looking for a lesson on using Singularity containers (instead of Docker), see this lesson: + +- [Reproducible Computational Environments Using Containers: Introduction to Singularity](https://carpentries-incubator.github.io/singularity-introduction/) + + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +:::::::::::::::::::::::::::::::::::::::::: prereq + +## Prerequisites + +- You should have basic familiarity with using a command shell, and the lesson text will at times request that you "open a shell window", with an assumption that you know what this means. + - Under Linux or macOS it is assumed that you will access a `bash` shell (usually the default), using your Terminal application. + - Under Windows, Powershell and Git Bash should allow you to use the Unix instructions. We will also try to give command variants for Windows `cmd.exe`. +- The lessons will sometimes request that you use a text editor to create or edit files in particular directories. It is assumed that you either have an editor that you know how to use that runs within the working directory of your shell window (e.g. `nano`), or that if you use a graphical editor, that you can use it to read and write files into the working directory of your shell. + + +:::::::::::::::::::::::::::::::::::::::::::::::::: + +::::::::::::::::::::::::::::::::::::::::: callout + +## A note about Docker + +Docker is a mature, robust and very widely used application. Nonetheless, +it is still under extensive development. New versions are released regularly +often containing a range of updates and new features. + +While we do our best to ensure that this lesson remains up to date and the +descriptions and outputs shown match what you will see on your own computer, +inconsistencies can occur. + +If you spot inconsistencies or encounter any problems, please do report them +by [opening an issue][open a lesson issue] in the [GitHub repository][docker-introduction repository] +for this lesson. + + +:::::::::::::::::::::::::::::::::::::::::::::::::: + -{% include links.md %} {% comment %} @@ -58,4 +82,7 @@ TODO: systematically check for Windows-isms + {% endcomment %} + + diff --git a/_extras/06-containers-on-the-cloud.md b/instructors/06-containers-on-the-cloud.md similarity index 78% rename from _extras/06-containers-on-the-cloud.md rename to instructors/06-containers-on-the-cloud.md index ef1abca53..7aede9da0 100644 --- a/_extras/06-containers-on-the-cloud.md +++ b/instructors/06-containers-on-the-cloud.md @@ -1,15 +1,16 @@ --- -title: "Creating Containers on the Cloud" +title: Creating Containers on the Cloud teaching: 20 exercises: 0 -questions: -- "How can I create Docker containers in the cloud?" +questions: How can I create Docker containers in the cloud? objectives: -- "Demonstrate how to effect creation of a container from the Docker image in the cloud." -- "Gain an initial experience of the container functionality provided by the Bitbucket repository storage service." +- Demonstrate how to effect creation of a container from the Docker image in the cloud. +- Gain an initial experience of the container functionality provided by the Bitbucket + repository storage service. keypoints: -- "You can create Docker containers on cloud computing resources just using a web browser." -- "Bitbucket is an online repository storage service that can create Docker containers to perform computation in response to files changing in your repositories." +- You can create Docker containers on cloud computing resources just using a web browser. +- Bitbucket is an online repository storage service that can create Docker containers + to perform computation in response to files changing in your repositories. --- ## Containers can be created on cloud computing platforms @@ -20,23 +21,28 @@ Note also that most cloud providers will give you sign-up credit that you can us For this lesson, though, we instead use an excellent software project repository platform, Bitbucket, that allows users a monthly quota of minutes for which containers of your choice can be run. Bitbucket allows containers to be created in response to the modification of files within your software project. -> ## There are many excellent sites for storing public software repositories -> - Note that Bitbucket, GitHub and GitLab all achieve similar functions. -> - Bitbucket offers container-based features that are easier to get at than the equivalent functions in GitHub, although GitHub will soon catch up when they release their GitHub Actions functionality publicly. -{: .callout} +::::::::::::::::::::::::::::::::::::::::: callout + +## There are many excellent sites for storing public software repositories + +- Note that Bitbucket, GitHub and GitLab all achieve similar functions. +- Bitbucket offers container-based features that are easier to get at than the equivalent functions in GitHub, although GitHub will soon catch up when they release their GitHub Actions functionality publicly. + + +:::::::::::::::::::::::::::::::::::::::::::::::::: ### Running a container in the cloud, using your Bitbucket account Because the ability to use the `git` version management tool is not a prerequisite of this session, we will use Bitbucket in an atypical manner. However we should still be able to clearly see Bitbucket's cloud servers running a container of your choice, under your control. -- Open a web browser window and visit . +- Open a web browser window and visit [https://bitbucket.org/](https://bitbucket.org/). - Log into your Bitbucket account. - Click the "create" (or just "+", if the menu is not expanded) button near the top-left of the page. - In the Create menu that appears, choose "Repository". - You will need to fill in the "Create new repository" form: - - You need to choose a Repository name: I am going to choose "use-my-container", and you are welcome to do the same. - - I unchecked the "This is a private repository" button, to make my repository public - - All of the other defaults should be OK, including the advanced settings + - You need to choose a Repository name: I am going to choose "use-my-container", and you are welcome to do the same. + - I unchecked the "This is a private repository" button, to make my repository public + - All of the other defaults should be OK, including the advanced settings - Activate the "Create repository" button - A page with heading "Let's put some bits in your bucket" appears, since the aim of Bitbucket is to host repositories of code and data... but we will ignore this workflow, and instead, you should click "Pipelines" in the second menu in from the left. - A page that promotes Bitbucket Pipelines should appear. This page also notes that you have 500 free minutes per month (for a free account): this is minutes of time that your containers are allowed to run, on Bitbucket's cloud servers. You just need to click the "Start using Pipelines" link in the bottom-centre. @@ -50,7 +56,8 @@ You should be looking at a web-based text editor that is headed "bitbucket-pipel Bitbucket Pipelines allow you to specify software tools to run, for example, in response to files being changed in your Bitbucket projects. The Bitbucket servers run your software tools within Docker containers, and thus Bitbucket Pipelines can specify Docker images to fetch from the Docker Hub. Change your repository's `bitbucket-pipelines.yml` file to be similar to the following example, but note that you need to replace the Docker Hub user ID (alice in example) with yours. Also, ensure that your indentation steps in line-by-line, the language being used (YAML) gives significance to the indentation of the lines. -~~~ + +``` image: alice/my-container pipelines: @@ -58,13 +65,14 @@ pipelines: - step: script: - /bin/cat /root/my_message -~~~ +``` Click the "commit file" button. After you commit your `bitbucket-pipelines.yml` file, the Bitbucket Pipeline will download the Docker image you specified from the Docker Hub, and display the progress of the computations it runs. When using the `docker run` command (as you have done previously), the container takes some default actions after being created, which are specified in your Dockerfile (e.g., the `CMD` line). Bitbucket Pipelines disable these default actions, instead using the commands listed under the "script:" section in your `bitbucket-pipelines.yml`. Note that hyphens at the same indentation level are treated as an itemised list. There is only one item in our `script:` list, namely the command `/bin/cat /root/my_message`. If the pipeline runs successfully, a green heading containing a tick icon will be shown near the top of the page. On the right-hand-side of the page, you should see the following headings: + - Build step - `/bin/cat /root/my_message` - Build teardown @@ -75,16 +83,22 @@ While it is difficult to argue that this container achieves important computatio {% comment %} Going further section + ## Digital Ocean hosting Cloud providers such as Digital Ocean {% endcomment %} -{% include links.md %} + {% comment %} + + {% endcomment %} + + + diff --git a/_extras/08-orchestration.md b/instructors/08-orchestration.md similarity index 62% rename from _extras/08-orchestration.md rename to instructors/08-orchestration.md index dd004d4af..3a7211236 100644 --- a/_extras/08-orchestration.md +++ b/instructors/08-orchestration.md @@ -1,13 +1,11 @@ --- -title: "Container Orchestration" +title: Container Orchestration teaching: 10 exercises: 0 -questions: -- "How can I deploy multiple containers?" -objectives: -- "Become aware of container orchestration services." -keypoints: -- "Docker Compose, Kubernetes, and Docker Swarm are tools that can deploy multiple containers." +questions: How can I deploy multiple containers? +objectives: Become aware of container orchestration services. +keypoints: Docker Compose, Kubernetes, and Docker Swarm are tools that can deploy + multiple containers. --- ## Container Orchestration @@ -17,16 +15,21 @@ container orchestration tools that you may find useful when managing workflows t We won't go in depth on using these tools in this lesson but instead briefly describe a few options and point to useful resources on using these tools to allow you to explore them yourself. - - Docker Compose - - Kubernetes - - Docker Swarm +- Docker Compose +- Kubernetes +- Docker Swarm -> ## The Wild West -> Use of container orchestration tools for research workflows is a relatively new concept and so there -> is not a huge amount of documentation and experience out there at the moment. You may need to search -> around for useful information or, better still, contact your -> [friendly neighbourhood RSE](https://society-rse.org/community/rse-groups/) to discuss what you want to do. -{: .callout} +::::::::::::::::::::::::::::::::::::::::: callout + +## The Wild West + +Use of container orchestration tools for research workflows is a relatively new concept and so there +is not a huge amount of documentation and experience out there at the moment. You may need to search +around for useful information or, better still, contact your +[friendly neighbourhood RSE](https://society-rse.org/community/rse-groups/) to discuss what you want to do. + + +:::::::::::::::::::::::::::::::::::::::::::::::::: **Docker Compose** provides a way of constructing a unified workflow (or service) made up of multiple individual Docker containers. In addition to the individual Dockerfiles for each container, you provide @@ -34,8 +37,8 @@ a higher-level configuration file which describes the different containers and h along with shared storage definitions between the containers. Once this high-level configuration has been defined, you can use single commands to start and stop the orchestrated set of containers. - - [Using Docker Compose for the Simple Deployment of an Integrated Drug Target Screening Platform](https://www.degruyter.com/view/journals/jib/14/2/article-20170016.xml) - - [Docker Compose Overview](https://docs.docker.com/compose/) +- [Using Docker Compose for the Simple Deployment of an Integrated Drug Target Screening Platform](https://www.degruyter.com/view/journals/jib/14/2/article-20170016.xml) +- [Docker Compose Overview](https://docs.docker.com/compose/) **Kubernetes** is an open source framework that provides similar functionality to Docker Compose. Its particular strengths are that is platform independent and can be used with many different container @@ -43,7 +46,7 @@ technologies and that it is widely available on cloud platforms so once you have in Kubernetes it can be deployed in different locations as required. It has become the de facto standard for container orchestration. - - [What is Kubernetes](https://kubernetes.io/docs/concepts/overview/what-is-kubernetes/) +- [What is Kubernetes](https://kubernetes.io/docs/concepts/overview/what-is-kubernetes/) **Docker Swarm** provides a way to scale out to multiple copies of similar containers. This potentially allows you to parallelise and scale out your research workflow so that you can run multiple copies and @@ -51,4 +54,6 @@ increase throughput. This would allow you, for example, to take advantage of mul system or run your workflow in the cloud to access more resources. Docker Swarm uses the concept of a manager container and worker containers to implement this distribution. - - [Docker Swarm Overview](https://docs.docker.com/engine/swarm/) +- [Docker Swarm Overview](https://docs.docker.com/engine/swarm/) + + diff --git a/_extras/about.md b/instructors/about.md similarity index 69% rename from _extras/about.md rename to instructors/about.md index 5f07f659d..ad7d08c46 100644 --- a/_extras/about.md +++ b/instructors/about.md @@ -1,5 +1,8 @@ --- title: About --- + {% include carpentries.html %} -{% include links.md %} + + + diff --git a/_extras/e01-github-actions.md b/instructors/e01-github-actions.md similarity index 85% rename from _extras/e01-github-actions.md rename to instructors/e01-github-actions.md index 7f2942262..b2624e14b 100644 --- a/_extras/e01-github-actions.md +++ b/instructors/e01-github-actions.md @@ -1,20 +1,21 @@ --- -title: "Using Docker with Github Actions" -layout: episode +title: Using Docker with Github Actions teaching: 30 exercises: 0 -questions: -- "How do I use Docker from Github Actions?" +questions: How do I use Docker from Github Actions? objectives: -- "Generate Github.io pages using Pandoc" -- "Raise awareness of how Docker can be used in cloud services" -keypoints: -- "You can call any Docker image from a Github action" +- Generate Github.io pages using Pandoc +- Raise awareness of how Docker can be used in cloud services +keypoints: You can call any Docker image from a Github action --- -> This lesson can be taught as a replacement of the episode "Containers on the Cloud". -> Participants should have experience working with `git` and Github. -{: .callout} +::::::::::::::::::::::::::::::::::::::::: callout + +This lesson can be taught as a replacement of the episode "Containers on the Cloud". +Participants should have experience working with `git` and Github. + + +:::::::::::::::::::::::::::::::::::::::::::::::::: Docker has become an industry standard in providing run-time environments to cloud services. This lesson shows how you can use Docker images inside Github Actions. Our specific @@ -22,6 +23,7 @@ example will show a neat way to build a simple website that goes with any project you might have going. # Github Actions + Github Actions are a means of automating repetitive task in maintaining software projects: - Testing if your software works correctly (Continuous Integration) @@ -46,6 +48,7 @@ demonstrate the use of a Docker container in deploying a small website presentin project. # Building Github.io pages with Pandoc + Suppose you have a Github project with a README and would like to turn it into HTML for a Github.io page. A common problem in documenting and testing software is to keep relevant content in a single location. In a Github project this location is the README, however it will look a lot more @@ -59,48 +62,54 @@ A fabulous tool for building web content from Markdown files is Pandoc. You coul army knife of document conversion: it is very, very versatile. In this instance we will only use its most basic operation. (If you are familiar with RMarkdown: Pandoc is what powers RMarkdown). -> ## Why Pandoc? -> There are other engines that can do this for you, but here are some features that win some people +::::::::::::::::::::::::::::::::::::::::: callout + +## Why Pandoc? + +There are other engines that can do this for you, but here are some features that win some people over: -> - Supports citations (from BibTeX or CSL database) -> - Rendered equations (using MathJax, optionally numbered) -> - Code highlighting -> - Highly customizable -{: .callout} + +- Supports citations (from BibTeX or CSL database) +- Rendered equations (using MathJax, optionally numbered) +- Code highlighting +- Highly customizable + + +:::::::::::::::::::::::::::::::::::::::::::::::::: We take you through the process of creating a project on Github from scratch and convert the README to HTML and upload it to a separate `gh-pages` branch. First let's take a look at what the end product will look like. We have a project ([example here](https://github.com/jhidding/readme-pages)) with a `main` branch that includes a README. -![A GitHub project with a README](../fig/github-main-branch.png){: width="90%"} +![](fig/github-main-branch.png){alt='A GitHub project with a README' width="90%"} We can use Pandoc to turn this README into a simple static website. -![Rendered GitHub Pages](../fig/github-io-pages.png){: width="90%"} +![](fig/github-io-pages.png){alt='Rendered GitHub Pages' width="90%"} If we switch to `gh-pages` branch in Github we can see where this page is hosted. -![`gh-pages` branch of the project](../fig/github-gh-pages-branch.png){: width="90%"} +![](fig/github-gh-pages-branch.png){alt='gh-pages branch of the project' width="90%"} Only a `index.html` and `.nojekyll` (that prevents Github from creating a Jekyll page). So how do we set this up? ## Create a Github Project + Create a github project with a short `README.md`. To do this: - go to `github.com` and make sure you're logged in - click the green "New" button at the top right - clone the new project to your computer. The instructions for -doing so will be shown in the dialog on Github, or you can also see [Software Carpentry lesson on Version -Control with Git](http://swcarpentry.github.io/git-novice/07-github/index.html), or -the example below: + doing so will be shown in the dialog on Github, or you can also see [Software Carpentry lesson on Version + Control with Git](https://swcarpentry.github.io/git-novice/07-github/index.html), or + the example below: -~~~ +```source git clone cd -~~~ -{: .source} +``` ## Using Pandoc to Create a Website @@ -113,11 +122,11 @@ it to generate static websites from Markdown. First, let's download a container with pandoc installed and run it to see what the pandoc version is. -~~~ +```source docker container run pandoc/core --version -~~~ -{: .source} -~~~ +``` + +```output Unable to find image 'pandoc/core:latest' locally latest: Pulling from pandoc/core f84cab65f19f: Pull complete @@ -133,21 +142,19 @@ User data directory: /root/.local/share/pandoc Copyright (C) 2006-2021 John MacFarlane. Web: https://pandoc.org This is free software; see the source for copying conditions. There is no warranty, not even for merchantability or fitness for a particular purpose. -~~~ -{: .output} +``` Now, we can run pandoc on our `README.md` file by including our current directory and the `README.md` file as part of the `docker container run` command: -~~~ +```source docker container run --mount type=bind,source=${PWD},target=/tmp pandoc/core /tmp/README.md -~~~ -{: .source} -~~~ +``` + +```output

readme-pages

Example for generating Github.io pages from Readme with Pandoc.

-~~~ -{: .output} +``` Here, the `--mount type=bind,source=${PWD},target=/tmp` flag says to take the directory at `${PWD}` and make it available inside the container as `/tmp`. Then `pandoc` can read the source file (`README.md`) and convert it to HTML. While this HTML @@ -155,43 +162,41 @@ is valid, it doesn't show the complete structure of a standalone HTML document. add the `--standalone` argument to the pandoc command. Also we can redirect the output to create a HTML file in the `build` directory. -~~~ +```source mkdir -p build docker container run --mount type=bind,source=${PWD},target=/tmp pandoc/core /tmp/README.md --standalone --output=/tmp/build/index.html -~~~ -{: .source} -~~~ +``` + +```output [WARNING] This document format requires a nonempty element. Defaulting to 'README' as the title. To specify a title, use 'title' in metadata or --metadata title="...". -~~~ -{: .output} +``` To suppress the warning message we may add the following lines at the top of the `README.md` file: -~~~ +``` --- title: Hello, Pandoc --- -~~~ +``` Or add the mentioned `--metadata title="..."` to the command line. Once we've made all of these changes, and produced the output we want, we can check it, using this command: -~~~ +```source cat build/index.html -~~~ -{: .source} -~~~ +``` + +```output <!DOCTYPE html> <html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang=""> <head> <meta charset="utf-8" /> ... etc -~~~ -{: .output} +``` We now have tested our website deployment workflow - given the source files from Github, we can use a Docker container and command to generate our website. We now @@ -209,25 +214,23 @@ are taken immediately to a menu for creating a new one. We will skip the templat The first entry is the **name** of the workflow -~~~yaml +```source, yaml name: Deploy pages -~~~ -{: .source} +``` Next we specify **when** this workflow is run. In this case: every time content is pushed to the `main` branch -~~~yaml +```source, yaml on: push: branches: - main -~~~ -{: .source} +``` Now we tell Github **what** to do. -~~~yaml +```source, yaml jobs: deploy: # a free machine-readable name for this job runs-on: ubuntu-latest # specify the base operating system @@ -238,12 +241,11 @@ jobs: run: | # multiple Bash commands follow mkdir -p build touch build/.nojekyll -~~~ -{: .source} +``` Now for the Docker bit: -~~~yaml +```source, yaml - name: Run pandoc uses: docker://pandoc/core:2.12 # Always specify a version! with: @@ -256,8 +258,7 @@ Now for the Docker bit: with: branch: gh-pages folder: build -~~~ -{: .source} +``` We may recognize the command-line that we had previously. Notice that we don't need to specify the `--mount` flag. Github Actions arranges the Docker environment such that the files are in the correct @@ -268,16 +269,22 @@ Now we should enable Github Pages on this repository: go to the "Settings" tab a seconds the page should be up. # Reference material + - [Pandoc the universal document converter](https://pandoc.org) - [Documentation on GitHub Actions](https://docs.github.com/en/actions) - [GitHub Pages deploy action](https://github.com/marketplace/actions/deploy-to-github-pages) - [Pandoc action example](https://github.com/pandoc/pandoc-action-example) -{% include links.md %} + {% comment %} + <!-- LocalWords: keypoints links.md endcomment Dockerfile --> + {% endcomment %} + <!-- LocalWords: bitbucket-pipelines.yml --> + + diff --git a/_extras/e02-jekyll-lesson-example.md b/instructors/e02-jekyll-lesson-example.md similarity index 76% rename from _extras/e02-jekyll-lesson-example.md rename to instructors/e02-jekyll-lesson-example.md index 9ec9b6b5b..4cb5c7330 100644 --- a/_extras/e02-jekyll-lesson-example.md +++ b/instructors/e02-jekyll-lesson-example.md @@ -1,17 +1,19 @@ --- -title: "Using Docker with Jekyll - Containers Used in Generating this Lesson" -layout: episode +title: Using Docker with Jekyll - Containers Used in Generating this Lesson teaching: 20 exercises: 0 questions: -- "What is an example of how I might use Docker instead of installing software?" -- "How can containers be useful to me for building websites?" +- What is an example of how I might use Docker instead of installing software? +- How can containers be useful to me for building websites? objectives: -- "Use an existing container image and Docker in place of complicated software installation work." -- "Demonstrate how to construct a website using containers to transform a specification into a fully-presented website." +- Use an existing container image and Docker in place of complicated software installation + work. +- Demonstrate how to construct a website using containers to transform a specification + into a fully-presented website. keypoints: -- "You can use existing container images and Docker instead of installing additional software." -- "The generation of this lesson website can be effected using a container." +- You can use existing container images and Docker instead of installing additional + software. +- The generation of this lesson website can be effected using a container. --- As previously mentioned earlier in the lesson, containers can be helpful for @@ -24,26 +26,33 @@ This requires installing Jekyll and dependencies such as Ruby and Gemfiles to yo which can be difficult to achieve given complexities such as needing to match specific versions of the software components. Instead you could use Docker and a pre-built Jekyll container image. First we need to get a copy of the website source to work with on your computer. -In your shell window, in your `docker-intro` create a new directory `build-website` and `cd` into it. We will be expanding a ZIP file into this directory later. +In your shell window, in your `docker-intro` create a new directory `build-website` and `cd` into it. We will be expanding a ZIP file into this directory later. Now open a web browser window and: + 1. Navigate to the [GitHub repository][docker-introduction repository] that contains the files for this session; 2. Click the green "Clone or download" button on the right-hand side of the page; 3. Click "Download ZIP". 4. The downloaded ZIP file should contain one directory named `docker-introduction-gh-pages`. 5. Move the `docker-introduction-gh-pages` folder into the `build-website` folder you created above. -> ## There are many ways to work with ZIP files -> Note that the last two steps can be achieved using a Mac or Windows graphical user interface. There are also ways to effect expanding the ZIP archive on the command line, for example, on my Mac I can achieve the effect of those last two steps through running the command `unzip ~/Downloads/docker-introduction-gh-pages.zip`. -{: .callout} +::::::::::::::::::::::::::::::::::::::::: callout + +## There are many ways to work with ZIP files + +Note that the last two steps can be achieved using a Mac or Windows graphical user interface. There are also ways to effect expanding the ZIP archive on the command line, for example, on my Mac I can achieve the effect of those last two steps through running the command `unzip ~/Downloads/docker-introduction-gh-pages.zip`. + + +:::::::::::::::::::::::::::::::::::::::::::::::::: In your shell window, if you `cd` into the `docker-introduction-gh-pages` folder and list the files, you should see something similar to what I see: -~~~ + +```bash $ cd docker-introduction-gh-pages $ ls -~~~ -{: .language-bash} -~~~ +``` + +```output AUTHORS _episodes code CITATION _episodes_rmd data CODE_OF_CONDUCT.md _extras fig @@ -52,19 +61,19 @@ LICENSE.md _layouts index.md Makefile aio.md reference.md README.md assets setup.md _config.yml bin -~~~ -{: .output} +``` -You can now request that a container is created that will compile the files in this set into the lesson website, and will run a simple webserver to allow you to view your version of the website locally. Note that this command will be long and fiddly to type, so you probably want to copy-and-paste it into your shell window. This command will continue to (re-)generate and serve up your version of the lesson website, so you will not get your shell prompt back until you type <kbd>control</kbd>+<kbd>c</kbd>. This will stop the webserver, since it cleans away the container. +You can now request that a container is created that will compile the files in this set into the lesson website, and will run a simple webserver to allow you to view your version of the website locally. Note that this command will be long and fiddly to type, so you probably want to copy-and-paste it into your shell window. This command will continue to (re-)generate and serve up your version of the lesson website, so you will not get your shell prompt back until you type <kbd>control</kbd>\+<kbd>c</kbd>. This will stop the webserver, since it cleans away the container. For macOS, Linux and PowerShell: -~~~ + +```bash $ docker container run --rm -it --mount type=bind,source=${PWD},target=/srv/jekyll -p 127.0.0.1:4000:4000 jekyll/jekyll:3 jekyll serve -~~~ -{: .language-bash} +``` When I ran the macOS command, the output was as follows: -~~~ + +```output Unable to find image 'jekyll/jekyll:3' locally 3: Pulling from jekyll/jekyll 9d48c3bd43c5: Pull complete @@ -90,12 +99,11 @@ To use retry middleware with Faraday v2.0+, install `faraday-retry` gem Auto-regeneration: enabled for '/srv/jekyll' Server address: http://0.0.0.0:4000 Server running... press ctrl-c to stop. -~~~ -{: .output} +``` In the preceding output, you see Docker downloading the container image for Jekyll, which is a tool for building websites from specification files such as those used for this lesson. The line `jekyll serve` indicates a command that runs within the Docker container instance. The output below that is from the Jekyll tool itself, highlighting that the website has been built, and indicating that there is a server running. -Open a web browser window and visit the address <http://localhost:4000/>. You should see a site that looks very similar to that at <https://carpentries-incubator.github.io/docker-introduction/>. +Open a web browser window and visit the address [http://localhost:4000/](https://localhost:4000/). You should see a site that looks very similar to that at [https://carpentries-incubator.github.io/docker-introduction/](https://carpentries-incubator.github.io/docker-introduction/). Using a new shell window, or using your laptop's GUI, locate the file `index.md` within the `docker-introduction-gh-pages` directory, and open it in your preferred editor program. @@ -103,21 +111,29 @@ Near the top of this file you should see the description starting "This session If you reload your web browser, the change that you just made should be visible. This is because the Jekyll container saw that you changed the `index.md` file, and regenerated the website. -You can stop the Jekyll container by clicking in its terminal window and typing <kbd>control</kbd>+<kbd>c</kbd>. +You can stop the Jekyll container by clicking in its terminal window and typing <kbd>control</kbd>\+<kbd>c</kbd>. You have now achieved using a reproducible computational environment to reproduce a lesson about reproducible computing environments. -{% include links.md %} + {% comment %} + <!-- LocalWords: keypoints _episodes_rmd CODE_OF_CONDUCT.md aio.md --> + <!-- LocalWords: CONTRIBUTING.md LICENSE.md index.md reference.md --> + <!-- LocalWords: README.md setup.md _config.yml webserver srv --> + <!-- LocalWords: jekyll x86_64-linux-musl favicons github.io --> + <!-- LocalWords: links.md _episodes_rmd _config.yml endcomment --> + {% endcomment %} + + diff --git a/_extras/guide.md b/instructors/instructor-notes.md similarity index 55% rename from _extras/guide.md rename to instructors/instructor-notes.md index a7e6dbefd..5c2ebf70b 100644 --- a/_extras/guide.md +++ b/instructors/instructor-notes.md @@ -1,5 +1,5 @@ --- -title: "Instructor Notes" +title: Instructor Notes --- ## Before Teaching This Lesson @@ -13,8 +13,8 @@ In particular, there can be differences between macOS, Windows and Linux platforms. Updates and changes introduced in Docker releases are highlighted in the [Docker release notes][Docker release notes]. -_You are strongly advised to run through the lesson content prior to teaching -the lesson to ensure that everything works as expected._ +*You are strongly advised to run through the lesson content prior to teaching +the lesson to ensure that everything works as expected.* If you experience any issues, please [open an issue][open a lesson issue] in the lesson repository describing the problem and platform(s) affected. The lesson maintainers will @@ -24,32 +24,32 @@ expected with the lesson content. ## Miscellaneous Tips -* **Timing**: With all the lesson episodes taken together, there's way more than three hours of material in this lesson. -Focusing on the earlier episodes (Introduction through the first half -of Creating Container Images) will take just about three hours if you -also include a brief general introduction and time to check your learners' -software installations. -* **Install Issues**: From the feedback we have received about past lessons, computers running -Microsoft Windows have encountered the largest number of challenges setting up Docker. -Consider having people check their install in advance at a separate time or come early. -In online workshops, consider using your video conferencing software's "breakout room" functionality -to form smaller groups within which participants can troubleshoot their installations. -Note that you should use a more complex command than `docker --version` to test the installation, as the -simplest `docker` commands to not connect to the Docker backend. -* **Virtualization Illustration**: When going through the intro to containers, -consider demonstrating what this might look like by having two shells (or shell tabs) -open, one on your host computer and one into a container you started before the -workshop. Then you can demonstrate in a simple way that from the same (host) computer, -you can access two different types of environments -- one via the shell on your -host computer and one via the shell into a running container. Sample commands could include: - * `whoami` - * `pwd` and `ls` - * something that shows the OS. On mac, this could be `sw_vers`, on linux `cat /etc/os-release` -* **Reflection Exercise**: At the beginning and end of the workshop, give participants time to -reflect on what they want to get out of the workshop (at the beginning) and what they -can apply to their work (at the end). Using the shared notes doc is a great way to -do this and a good way to make sure that you've addressed specific concerns or goals -of the participants. +- **Timing**: With all the lesson episodes taken together, there's way more than three hours of material in this lesson. + Focusing on the earlier episodes (Introduction through the first half + of Creating Container Images) will take just about three hours if you + also include a brief general introduction and time to check your learners' + software installations. +- **Install Issues**: From the feedback we have received about past lessons, computers running + Microsoft Windows have encountered the largest number of challenges setting up Docker. + Consider having people check their install in advance at a separate time or come early. + In online workshops, consider using your video conferencing software's "breakout room" functionality + to form smaller groups within which participants can troubleshoot their installations. + Note that you should use a more complex command than `docker --version` to test the installation, as the + simplest `docker` commands to not connect to the Docker backend. +- **Virtualization Illustration**: When going through the intro to containers, + consider demonstrating what this might look like by having two shells (or shell tabs) + open, one on your host computer and one into a container you started before the + workshop. Then you can demonstrate in a simple way that from the same (host) computer, + you can access two different types of environments -- one via the shell on your + host computer and one via the shell into a running container. Sample commands could include: + - `whoami` + - `pwd` and `ls` + - something that shows the OS. On mac, this could be `sw_vers`, on linux `cat /etc/os-release` +- **Reflection Exercise**: At the beginning and end of the workshop, give participants time to + reflect on what they want to get out of the workshop (at the beginning) and what they + can apply to their work (at the end). Using the shared notes doc is a great way to + do this and a good way to make sure that you've addressed specific concerns or goals + of the participants. ## Learner Profiles and Pathways @@ -72,7 +72,7 @@ that you may find among learners engaging with this material. With these profiles, we aim to encourage you to think about the learners attending your workshop(s) and which episodes it may be most useful to teach. -**_Nelson is a graduate student in microbiology._** They have experience in running Unix shell +***Nelson is a graduate student in microbiology.*** They have experience in running Unix shell commands and using libraries in R for the bioinformatics workflows they have developed. They are expanding their analysis to run on 3000 genomes in 200 samples and they have started to use the local cluster to run their workflows. The local research computing @@ -82,22 +82,22 @@ have made so they want to learn how to use Docker. They would also be interested creating their own Docker images for other lab members and collaborators to re-use their workflows. -**_Caitlin is a second year undergraduate in computer science examining Docker for the first -time._** She has heard about Docker but does not really know what it achieves or why it is +***Caitlin is a second year undergraduate in computer science examining Docker for the first +time.*** She has heard about Docker but does not really know what it achieves or why it is useful. She is reasonably confident in using the Unix shell, having used it briefly in her first year modules. She is keen to find jump-off points to learn more about technical details and alternative technologies that are also popular, having heard that container technologies are widely used within industry. -**_Xu, a materials science researcher, wants to package her software for release with -a paper to help ensure reproducibility._** She has written some code that makes use of a +***Xu, a materials science researcher, wants to package her software for release with +a paper to help ensure reproducibility.*** She has written some code that makes use of a series of Python libraries to undertake analysis of a compound. She wants to (or is required to) make her software available as part of the paper submission. She understands why Docker is important in helping to ensure reproducibility but not the process and low-level detail of preparing a container and archiving it to obtain a DOI for inclusion with the paper submission. -**_Bronwyn is a PhD student running Python/R scripts on her local laptop/workstation._** +***Bronwyn is a PhD student running Python/R scripts on her local laptop/workstation.*** She is having difficulty getting all the tools she needs to work because of conflicting dependencies and little experience with package managers. She is also keen to reduce the overhead of managing software so she can get on with her thesis research. She has @@ -106,8 +106,8 @@ this on her own and does not have access to any expertise in this within her loc research group. She currently wants to know how to use preexisting Docker containers but may need to create her own containers in the future. -**_Virat is a grad student who is running an obscure bioinformatics tool (from a GitHub -repo) that depends on a number of other tools that need to be pre-installed ._** He wants to be able to +***Virat is a grad student who is running an obscure bioinformatics tool (from a GitHub +repo) that depends on a number of other tools that need to be pre-installed .*** He wants to be able to run on multiple resources and have his undergrad assistant use the same tools. Virat has command line experience and has struggled his way through complex installations but he has no formal CS background - he only knows to use containers because a departmental @@ -123,32 +123,32 @@ we hope that highlighting these groups helps to provide an example of the different types of skills and expertise that learners engaging with this material may have: - - **Researchers:** For researchers, even those based in non-computational domains, software - is an increasingly important element of their day-to-day work. Whether they are writing - code or installing, configuring and/or running software to support their research, they - will eventually need to deal with the complexities of running software on different - platforms, handling complex software dependencies and potentially submitting their code and data to - repositories to support the reproduction of research outputs by other researchers, or to - meet the requirements of publishers or funders. Software container technologies are valuable - to help researchers address these challenges. +- **Researchers:** For researchers, even those based in non-computational domains, software + is an increasingly important element of their day-to-day work. Whether they are writing + code or installing, configuring and/or running software to support their research, they + will eventually need to deal with the complexities of running software on different + platforms, handling complex software dependencies and potentially submitting their code and data to + repositories to support the reproduction of research outputs by other researchers, or to + meet the requirements of publishers or funders. Software container technologies are valuable + to help researchers address these challenges. - **RSEs:** RSEs -- Research Software Engineers -- provide software development, training -and technical guidance to support the development of reliable, maintainable, sustainable -research software. They will generally have extensive technical skills but they may not -have experience of working with or managing software containers. In addition to working with -researchers to help build and package software, they are likely to be interested in how -containers can help to support best practices for the development of research software -and aspects such as software deployment. - - - **Systems professionals:** Systems professionals represent the more technical end of -our spectrum of learners. They may be based within a central IT services environment -within a research institution or within individual departments or research groups. -Their work is likely to encompass supporting researchers with effective use of -infrastructure and they are likely to need to know about managing and orchestrating -multiple containers in more complex environments. For example, they may need to provide -database servers, web application servers and other services that can be deployed -in containerized environments to support more straightforward management, maintenance -and upgradeability. + and technical guidance to support the development of reliable, maintainable, sustainable + research software. They will generally have extensive technical skills but they may not + have experience of working with or managing software containers. In addition to working with + researchers to help build and package software, they are likely to be interested in how + containers can help to support best practices for the development of research software + and aspects such as software deployment. + +- **Systems professionals:** Systems professionals represent the more technical end of + our spectrum of learners. They may be based within a central IT services environment + within a research institution or within individual departments or research groups. + Their work is likely to encompass supporting researchers with effective use of + infrastructure and they are likely to need to know about managing and orchestrating + multiple containers in more complex environments. For example, they may need to provide + database servers, web application servers and other services that can be deployed + in containerized environments to support more straightforward management, maintenance + and upgradeability. ### Learner Pathways @@ -166,7 +166,7 @@ to use them. Moving beyond the core features there are a number of topics that are likely to only be of interest to different sub-groups of learners. To support these -different groups of learners we have developed a set of "_learner pathways_" +different groups of learners we have developed a set of "*learner pathways*" that provide suggested routes through the material based on different use cases or areas of interest. @@ -178,44 +178,48 @@ consider. Each pathway will have a slightly different emphasis on specific sets of topics. We highlight learner different profiles that we believe map well to specific pathways. -_Note that the material in this lesson continues to develop and experience +*Note that the material in this lesson continues to develop and experience of teaching the material is increasing. In due course we intend to offer more detailed pathway information including specific episode schedules that we -think are most suited to the pathways highlighted._ +think are most suited to the pathways highlighted.* **Core content:** The Docker lesson contains a set of core content that we expect to be relevant for all learner pathways. This includes: - - Introducing container concepts and the Docker software - - Running through the basic use of Docker including: - - Core commands for listing and managing images and containers - - Obtaining container images from Docker Hub - - Running containers from container images - - Building container images - +- Introducing container concepts and the Docker software +- Running through the basic use of Docker including: + - Core commands for listing and managing images and containers + - Obtaining container images from Docker Hub + - Running containers from container images + - Building container images + Beyond this, different pathways offer scope to bring in different episodes containing different lesson content to support different target audiences or areas of interest Some suggested pathways include: - - **Reproducible research** - - _Common learner profiles:_ Researcher; RSE - - - **Cloud computing** - - _Common learner profiles:_ Sytems professional, RSE +- **Reproducible research** + + - *Common learner profiles:* Researcher; RSE - - **High performance computing** - - _Common learner profiles:_ Researcher; RSE; Systems professional +- **Cloud computing** + + - *Common learner profiles:* Sytems professional, RSE +- **High performance computing** + + - *Common learner profiles:* Researcher; RSE; Systems professional ## Common Points of Confusion -* difference between a container and container image -* what it means for a container to be stopped (but not removed) -* differences in container behaviour between hosts that are running Linux compared to hosts running macOS or Microsoft Windows - * on Linux hosts there is usually only one OS kernel shared between the host and the containers, so less separation than is typical when using macOS or Windows hosts. This can lead to effects such as volume mounts behaving differently, e.g., regarding filesystem permissions, user and group mappings between the host and the container. +- difference between a container and container image +- what it means for a container to be stopped (but not removed) +- differences in container behaviour between hosts that are running Linux compared to hosts running macOS or Microsoft Windows + - on Linux hosts there is usually only one OS kernel shared between the host and the containers, so less separation than is typical when using macOS or Windows hosts. This can lead to effects such as volume mounts behaving differently, e.g., regarding filesystem permissions, user and group mappings between the host and the container. + + + -{% include links.md %} diff --git a/_extras/discuss.md b/learners/discuss.md similarity index 58% rename from _extras/discuss.md rename to learners/discuss.md index bfc33c504..515e3baf1 100644 --- a/_extras/discuss.md +++ b/learners/discuss.md @@ -1,6 +1,9 @@ --- title: Discussion --- + FIXME -{% include links.md %} + + + diff --git a/reference.md b/learners/reference.md similarity index 94% rename from reference.md rename to learners/reference.md index 6b55b73e9..925f6631d 100644 --- a/reference.md +++ b/learners/reference.md @@ -1,5 +1,5 @@ --- -layout: reference +title: 'Glossary' --- ## Glossary @@ -24,7 +24,7 @@ layout: reference <dt>Docker</dt> <dd>A software framework for creating, running and managing <em>containers</em>.</dd> <dt>Docker build context</dt> - <dd>The docker build command builds Docker images from a Dockerfile and a “context”. A build's context is the set of files located in the specified PATH or URL.</dd> + <dd>The docker build command builds Docker images from a Dockerfile and a "context". A build's context is the set of files located in the specified PATH or URL.</dd> <dt>Docker Hub</dt> <dd>An online library of Docker <em>container images</em>.</dd> <dt>Docker Hub repository</dt> @@ -54,7 +54,9 @@ layout: reference <dt>Tar archive</dt> <dd>A file archive format commonly used in Unix-like operating systems that combines multiple files into a single file. tar archive files are used as the export format of <em>Docker images</em>.</dd> <dt>Virtualization</dt> - <dd><em>Containers</em> are an example of virtualization – having a second “virtual” computer running and accessible from a <em>host computer</em>.</dd> + <dd><em>Containers</em> are an example of virtualization – having a second "virtual" computer running and accessible from a <em>host computer</em>.</dd> </dl> -{% include links.md %} + + + diff --git a/setup.md b/learners/setup.md similarity index 73% rename from setup.md rename to learners/setup.md index 923a5d5b5..8e8e8e88b 100644 --- a/setup.md +++ b/learners/setup.md @@ -1,17 +1,21 @@ --- title: Setup --- + ### Website accounts to create + Please seek help at the start of the lesson if you have not been able to establish a website account on: -- The [Docker Hub](http://hub.docker.com). We will use the Docker Hub to download pre-built container images, and for you to upload and download container images that you create, as explained in the relevant lesson episodes. + +- The [Docker Hub](https://hub.docker.com). We will use the Docker Hub to download pre-built container images, and for you to upload and download container images that you create, as explained in the relevant lesson episodes. ### Files to download -Download the [`docker-intro.zip`]({{ page.root }}/files/docker-intro.zip) file. _This file can alternatively be downloaded from the `files` directory in the [docker-introduction GitHub repository][docker-introduction repository]_. +Download the [`docker-intro.zip`](files/docker-intro.zip) file. *This file can alternatively be downloaded from the `files` directory in the [docker-introduction GitHub repository][docker-introduction repository]*. -Move the downloaded file to your Desktop and unzip it. It should unzip to a folder called `docker-intro`. +Move the downloaded file to your Desktop and unzip it. It should unzip to a folder called `docker-intro`. ### Software to install + Docker's installation experience has steadily improved, however situations will arise in which installing Docker on your computer may not be straightforward unless you have a large amount of technical experience. Workshops try to have helpers on hand that have worked their way through the install process, but do be prepared for some troubleshooting. @@ -27,25 +31,34 @@ Ideally, you will be able to install the Docker Desktop software, following the Note that the above installation instructions highlight a minimum version or "build" that is required to be able to install Docker on your Windows 10 system. See [Which version of Windows operating system am I running?](https://support.microsoft.com/en-us/windows/which-version-of-windows-operating-system-am-i-running-628bec99-476a-2c13-5296-9dd081cdd808) for details of how to find out which version/build of Windows 10 you have. -If you are unable to follow the above instructions to install Docker Desktop on your Windows system, the final release of the deprecated Docker Toolbox version of Docker for Windows can be downloaded from the [releases page of the Docker Toolbox GitHub repository](https://github.com/docker/toolbox/releases). (Download the `.exe` file for the Windows installer). _Please note that this final release of Docker Toolbox includes an old version of Docker and you are strongly advised not to attempt to use this for any production use. It will, however, enable you to follow along with the lesson material._ - -> ## Warning: Git Bash -> If you are using Git Bash as your terminal on Windows then you should be aware that you may run -> into issues running some of the commands in this lesson as Git Bash will automatically re-write -> any paths you specify at the command line into Windows versions of the paths and this will confuse -> the Docker container you are trying to use. For example, if you enter the command: -> ``` -> docker run alpine cat /etc/os-release -> ``` -> Git Bash will change the `/etc/os-release` path to `C:\etc\os-release\` before passing the command -> to the Docker container and the container will report an error. If you want to use Git Bash then you -> can request that this path translation does not take place by adding an extra `/` to the start of the -> path. i.e. the command would become: -> ``` -> docker run alpine cat //etc/os-release -> ``` -> This should suppress the path translation functionality in Git Bash. -{: .callout} +If you are unable to follow the above instructions to install Docker Desktop on your Windows system, the final release of the deprecated Docker Toolbox version of Docker for Windows can be downloaded from the [releases page of the Docker Toolbox GitHub repository](https://github.com/docker/toolbox/releases). (Download the `.exe` file for the Windows installer). *Please note that this final release of Docker Toolbox includes an old version of Docker and you are strongly advised not to attempt to use this for any production use. It will, however, enable you to follow along with the lesson material.* + +::::::::::::::::::::::::::::::::::::::::: callout + +## Warning: Git Bash + +If you are using Git Bash as your terminal on Windows then you should be aware that you may run +into issues running some of the commands in this lesson as Git Bash will automatically re-write +any paths you specify at the command line into Windows versions of the paths and this will confuse +the Docker container you are trying to use. For example, if you enter the command: + +``` +docker run alpine cat /etc/os-release +``` + +Git Bash will change the `/etc/os-release` path to `C:\etc\os-release\` before passing the command +to the Docker container and the container will report an error. If you want to use Git Bash then you +can request that this path translation does not take place by adding an extra `/` to the start of the +path. i.e. the command would become: + +``` +docker run alpine cat //etc/os-release +``` + +This should suppress the path translation functionality in Git Bash. + + +:::::::::::::::::::::::::::::::::::::::::::::::::: #### Apple macOS @@ -66,19 +79,20 @@ we have not recently tested the Docker installation process via MacPorts. There are too many varieties of Linux to give precise instructions here, but hopefully you can locate documentation for getting Docker installed on your Linux distribution. It may already be installed. If it is not already installed on your system, the [Install Docker Engine](https://docs.docker.com/engine/install/) page provides an overview of supported Linux distributions and pointers to relevant installation information. Alternatively, see: - - [Docker Engine on CentOS](https://docs.docker.com/install/linux/docker-ce/centos/) - - [Docker Engine on Debian](https://docs.docker.com/install/linux/docker-ce/debian/) - - [Docker Engine on Fedora](https://docs.docker.com/install/linux/docker-ce/fedora/) - - [Docker Engine on Ubuntu](https://docs.docker.com/install/linux/docker-ce/ubuntu/) +- [Docker Engine on CentOS](https://docs.docker.com/install/linux/docker-ce/centos/) +- [Docker Engine on Debian](https://docs.docker.com/install/linux/docker-ce/debian/) +- [Docker Engine on Fedora](https://docs.docker.com/install/linux/docker-ce/fedora/) +- [Docker Engine on Ubuntu](https://docs.docker.com/install/linux/docker-ce/ubuntu/) ### Verify Installation To quickly check if the Docker and client and server are working run the following command in a new terminal or ssh session: -~~~ + +```bash $ docker version -~~~ -{: .language-bash} -~~~ +``` + +```output Client: Version: 20.10.2 API version: 1.41 @@ -107,35 +121,42 @@ Server: docker-init: Version: 0.19.0 GitCommit: -~~~ -{: .output} +``` The above output shows a successful installation and will vary based on your system. The important part is that the "Client" and the "Server" parts are both working and returns information. It is beyond the scope of this document to debug installation problems but common errors include the user not belonging to the `docker` group and forgetting to start a new terminal or ssh session. ### A quick tutorial on copy/pasting file contents from episodes of the lesson + Let's say you want to copy text off the lesson website and paste it into a file named `myfile` in the current working directory of a shell window. This can be achieved in many ways, depending on your computer's operating system, but routes I have found work for me: -- macOS and Linux: you are likely to have the `nano` editor installed, which provides you with a very straightforward way to create such a file, just run `nano myfile`, then paste text into the shell window, and press <kbd>control</kbd>+<kbd>x</kbd> to exit: you will be prompted whether you want to save changes to the file, and you can type <kbd>y</kbd> to say "yes". + +- macOS and Linux: you are likely to have the `nano` editor installed, which provides you with a very straightforward way to create such a file, just run `nano myfile`, then paste text into the shell window, and press <kbd>control</kbd>\+<kbd>x</kbd> to exit: you will be prompted whether you want to save changes to the file, and you can type <kbd>y</kbd> to say "yes". - Microsoft Windows running `cmd.exe` shells: - `del myfile` to remove `myfile` if it already existed; - `copy con myfile` to mean what's typed in your shell window is copied into `myfile`; - paste the text you want within `myfile` into the shell window; - - type <kbd>control</kbd>+<kbd>z</kbd> and then press <kbd>enter</kbd> to finish copying content into `myfile` and return to your shell; + - type <kbd>control</kbd>\+<kbd>z</kbd> and then press <kbd>enter</kbd> to finish copying content into `myfile` and return to your shell; - you can run the command `type myfile` to check the content of that file, as a double-check. - Microsoft Windows running PowerShell: - The `cmd.exe` method probably works, but another is to paste your file contents into a so-called "here-string" between `@'` and `'@` as in this example that follows (the ">" is the prompt indicator): + + ``` + > @' + Some hypothetical + file content that is + + split over many + + lines. + '@ | Set-Content myfile -encoding ascii + ``` - > @' - Some hypothetical - file content that is - - split over many - lines. - '@ | Set-Content myfile -encoding ascii - -{% include links.md %} {% comment %} + <!-- LocalWords: myfile kbd links.md md endcomment --> + {% endcomment %} + + diff --git a/profiles/learner-profiles.md b/profiles/learner-profiles.md new file mode 100644 index 000000000..434e335aa --- /dev/null +++ b/profiles/learner-profiles.md @@ -0,0 +1,5 @@ +--- +title: FIXME +--- + +This is a placeholder file. Please add content here. diff --git a/site/README.md b/site/README.md new file mode 100644 index 000000000..42997e3d0 --- /dev/null +++ b/site/README.md @@ -0,0 +1,2 @@ +This directory contains rendered lesson materials. Please do not edit files +here.