feat: upgrade julia to latest LTS version (#1966)

* feat: upgrade julia to latest LTS version Signed-off-by: Keming <[email protected]> * update ci trigger Signed-off-by: Keming <[email protected]> * fix typo Signed-off-by: Keming <[email protected]> --------- Signed-off-by: Keming <[email protected]>
tensorchord · Feb 2, 2025 · 80d77e8 · 80d77e8
1 parent 1519667
commit 80d77e8
Show file tree

Hide file tree

Showing 6 changed files with 121 additions and 139 deletions.
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -9,12 +9,16 @@ on:
       - '**.go'
       - 'Makefile'
       - 'go.**'
+      - 'pkg/**/*.sh'
+      - 'pkg/**/*.envd'
   pull_request:
     paths:
       - '.github/workflows/CI.yml'
       - '**.go'
       - 'Makefile'
       - 'go.**'
+      - 'pkg/**/*.sh'
+      - 'pkg/**/*.envd'
   merge_group:
   workflow_dispatch:
 
@@ -51,7 +55,7 @@ jobs:
           args: --timeout=5m
           version: latest
           # Ref https://github.com/golangci/golangci-lint-action/issues/244
-          skip-pkg-cache: true
+          skip-cache: true
   test:
     name: test
     env:

diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
@@ -30,7 +30,23 @@ jobs:
           args: --timeout=5m
           version: latest
           # Ref https://github.com/golangci/golangci-lint-action/issues/244
-          skip-pkg-cache: true
+          skip-cache: true
+  build:
+    name: build
+    if: github.repository == 'tensorchord/envd'
+    strategy:
+      matrix:
+        os: [ ubuntu-latest, macos-latest ]
+    runs-on: ${{ matrix.os }}
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v4
+      - name: Setup Go
+        uses: actions/setup-go@v5
+        with:
+          go-version: 'stable'
+      - name: Build
+        run: make
   test:
     name: test
     if: github.repository == 'tensorchord/envd'
@@ -54,21 +70,13 @@ jobs:
           git diff --cached --exit-code || (echo 'Please run "make generate" to verify generate' && exit 1);
       - name: Test
         run: make test
-      - name: Upload coverage report
-        uses: actions/upload-artifact@v4
-        with:
-          name: coverage-out
-          path: coverage.out
   e2e-cli:
     name: e2e-cli
     if: github.repository == 'tensorchord/envd'
     env:
       # Disable telemetry.
       ENVD_ANALYTICS: false
-    strategy:
-      matrix:
-        os: [ ubuntu-latest ]
-    runs-on: ${{ matrix.os }}
+    runs-on: ubuntu-22.04
     steps:
       - name: Check out code
         uses: actions/checkout@v4
@@ -82,21 +90,13 @@ jobs:
         run: make e2e-cli-test
         env:
           GIT_LATEST_TAG: ${{ steps.get-latest-tag.outputs.tag }}
-      - name: Upload coverage report
-        uses: actions/upload-artifact@v4
-        with:
-          name: e2e-cli-coverage-out
-          path: e2e-cli-coverage.out
   e2e-lang:
     name: e2e-lang
     if: github.repository == 'tensorchord/envd'
     env:
       # Disable telemetry.
       ENVD_ANALYTICS: false
-    strategy:
-      matrix:
-        os: [ ubuntu-latest ]
-    runs-on: ${{ matrix.os }}
+    runs-on: ubuntu-22.04
     steps:
       - name: Check out code
         uses: actions/checkout@v4
@@ -110,77 +110,13 @@ jobs:
         run: make e2e-lang-test
         env:
           GIT_LATEST_TAG: ${{ steps.get-latest-tag.outputs.tag }}
-      - name: Upload coverage report
-        uses: actions/upload-artifact@v4
-        with:
-          name: e2e-lang-coverage-out
-          path: e2e-lang-coverage.out
-  # notifies that all test jobs are finished.
-  report:
-    if: github.repository == 'tensorchord/envd'
-    needs:
-      - test
-      - e2e-cli
-      - e2e-lang
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check out code
-        uses: actions/checkout@v4
-      - name: Setup Go
-        uses: actions/setup-go@v5
-        with:
-          go-version: 'stable'
-      - name: Install bins
-        run: |
-          go install github.com/mattn/goveralls@latest
-          go install github.com/wadey/gocovmerge@latest
-      - name: Get coverage report
-        uses: actions/download-artifact@v4
-        with:
-          name: coverage-out
-          path: coverage.out
-      - name: Get cli e2e coverage report
-        uses: actions/download-artifact@v4
-        with:
-          name: e2e-cli-coverage-out
-          path: e2e-cli-coverage.out
-      - name: Get language e2e coverage report
-        uses: actions/download-artifact@v4
-        with:
-          name: e2e-lang-coverage-out
-          path: e2e-lang-coverage.out
-      # - name: Send coverage
-      #   env:
-      #     COVERALLS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      #   run: |
-      #     gocovmerge e2e-coverage.out coverage.out > final.out
-      #     goveralls -coverprofile=final.out -service=github
-  build:
-    name: build
-    if: github.repository == 'tensorchord/envd'
-    strategy:
-      matrix:
-        os: [ ubuntu-latest, macos-latest ]
-    runs-on: ${{ matrix.os }}
-    steps:
-      - name: Check out code
-        uses: actions/checkout@v4
-      - name: Setup Go
-        uses: actions/setup-go@v5
-        with:
-          go-version: 'stable'
-      - name: Build
-        run: make
   e2e-doc:
     name: e2e-doc
     if: github.repository == 'tensorchord/envd'
     env:
       # Disable telemetry.
       ENVD_ANALYTICS: false
-    strategy:
-      matrix:
-        os: [ubuntu-latest]
-    runs-on: ${{ matrix.os }}
+    runs-on: ubuntu-22.04
     steps:
       - name: Check out code
         uses: actions/checkout@v4
@@ -194,8 +130,3 @@ jobs:
         run: make e2e-doc-test
         env:
           GIT_LATEST_TAG: ${{ steps.get-latest-tag.outputs.tag }}
-      - name: Upload coverage report
-        uses: actions/upload-artifact@v4
-        with:
-          name: e2e-doc-coverage-out
-          path: e2e-doc-coverage.out
diff --git a/e2e/docs/testdata/julia_mnist/build.envd b/e2e/docs/testdata/julia_mnist/build.envd
@@ -1,7 +1,7 @@
 # syntax=v1
+
 def build():
     base(dev=True)
-
     install.julia()
     install.julia_packages(name=["Flux", "MLDatasets"])
-    runtime.command(commands={"julia-mnist": "julia mnist.jl"})
+    runtime.command(commands={"julia-mnist": "julia mlp_mnist.jl"})
diff --git a/e2e/docs/testdata/julia_mnist/mlp_mnist.jl b/e2e/docs/testdata/julia_mnist/mlp_mnist.jl
@@ -0,0 +1,85 @@
+# https://github.com/FluxML/model-zoo/blob/master/vision/mlp_mnist/mlp_mnist.jl
+# License: MIT Copyright (c) 2017 by Flux contributors
+
+# Simple multi-layer perceptron, for the MNIST hand-written digits.
+# This example does not use a GPU, it's small enough not to need one.
+
+using Flux, MLDatasets, Statistics
+
+# Our model is very simple: Its one "hidden layer" has 32 "neurons" each connected to every input pixel.
+# Each has a sigmoid nonlinearity, and is connected to every "neuron" in the output layer.
+# Finally, softmax produces probabilities, i.e. positive numbers which add up to 1:
+
+model = Chain(Dense(28^2 => 32, sigmoid), Dense(32 => 10), softmax)
+
+#===== DATA =====#
+
+# Calling MLDatasets.MNIST() will download the dataset if necessary,
+# and return a struct containing it.
+# It takes a few seconds to read from disk each time, so do this once:
+
+train_data = MLDatasets.MNIST()  # i.e. split=:train
+test_data = MLDatasets.MNIST(split=:test)
+
+# train_data.features is a 28×28×60000 Array{Float32, 3} of the images.
+# We need a 2D array for our model. Let's combine the reshape needed with
+# other pre-processing, in a function:
+
+function simple_loader(data::MNIST; batchsize::Int=64)
+    x2dim = reshape(data.features, 28^2, :)
+    yhot = Flux.onehotbatch(data.targets, 0:9)
+    Flux.DataLoader((x2dim, yhot); batchsize, shuffle=true)
+end
+
+# train_data.targets is a 60000-element Vector{Int}, of labels from 0 to 9.
+# Flux.onehotbatch([0,1,9], 0:9) makes a matrix of 0 and 1.
+
+simple_loader(train_data)  # returns a DataLoader, with first element a tuple like this:
+
+x1, y1 = first(simple_loader(train_data)); # (784×64 Matrix{Float32}, 10×64 OneHotMatrix)
+
+model(x1)  # x1 is the right shape for our model
+
+y1  # y1 is the same shape as the model output.
+
+# @show Flux.crossentropy(model(x1), y1);  # This will be our loss function
+
+#===== ACCURACY =====#
+
+# We're going to log accuracy and loss during training. There's no advantage to
+# calculating these on minibatches, since MNIST is small enough to do it at once.
+
+function simple_accuracy(model, data::MNIST=test_data)
+    (x, y) = only(simple_loader(data; batchsize=length(data)))  # make one big batch
+    y_hat = model(x)
+    iscorrect = Flux.onecold(y_hat) .== Flux.onecold(y)  # BitVector
+    acc = round(100 * mean(iscorrect); digits=2)
+end
+
+# @show simple_accuracy(model);  # accuracy about 10%, on training data, before training!
+
+#===== TRAINING =====#
+
+# Make a dataloader using the desired batchsize:
+
+train_loader = simple_loader(train_data, batchsize = 256)
+
+# Initialise storage needed for the Adam optimiser, with our chosen learning rate:
+
+opt_state = Flux.setup(Adam(3e-4), model);
+
+# Then train for 10 epochs, printing out details as we go:
+
+for epoch in 1:10
+    loss = 0.0
+    for (x, y) in train_loader
+        # Compute the loss and the gradients:
+        l, gs = Flux.withgradient(m -> Flux.crossentropy(m(x), y), model)
+        # Update the model parameters (and the Adam momenta):
+        Flux.update!(opt_state, model, gs[1])
+        # Accumulate the mean loss, just for logging:
+        loss += l / length(train_loader)
+    end
+end
+
+print(simple_accuracy(model, test_data))
diff --git a/e2e/docs/testdata/julia_mnist/mnist.jl b/e2e/docs/testdata/julia_mnist/mnist.jl
diff --git a/pkg/lang/ir/v1/julia.sh b/pkg/lang/ir/v1/julia.sh
@@ -1,17 +1,22 @@
 set -o pipefail && \
-JULIA_URL="https://julialang-s3.julialang.org/bin/linux/x64/1.8/julia-1.8.5-linux-x86_64.tar.gz"; \
-SHA256SUM="e71a24816e8fe9d5f4807664cbbb42738f5aa9fe05397d35c81d4c5d649b9d05"; \
+UNAME_M="$(uname -m)" && \
+if [ "${UNAME_M}" = "x86_64" ]; then \
+    JULIA_URL="https://julialang-s3.julialang.org/bin/linux/x64/1.10/julia-1.10.8-linux-x86_64.tar.gz"; \
+    SHA256SUM="0410175aeec3df63173c15187f2083f179d40596d36fd3a57819cc5f522ae735"; \
+elif [ "{UNAME_M}" = "aarch64" ]; then \
+    JULIA_URL="https://julialang-s3.julialang.org/bin/linux/aarch64/1.10/julia-1.10.8-linux-aarch64.tar.gz" \
+    SHA256SUM="8d63dd12595a08edc736be8d6c4fea1840f137b81c62079d970dbd1be448b8cd"; \
+fi && \
 
 wget "${JULIA_URL}" -O /tmp/julia.tar.gz && \
 echo "${SHA256SUM}  /tmp/julia.tar.gz" > /tmp/sha256sum && \
 sha256sum -c -s /tmp/sha256sum
 EXIT_CODE=$?
-if [ $EXIT_CODE -ne 0 ]; then  
+if [ $EXIT_CODE -ne 0 ]; then
     echo "CHECKSUM FAILED" && \
     rm /tmp/julia.tar.gz && \
     wget "${JULIA_URL}" -O /tmp/julia.tar.gz && \
     sha256sum -c -s /tmp/sha256sum
 else
     echo "CHECKSUM PASSED"
 fi
-