From 732ba915aaf82dac96362f1bf5fdcc15ee87e39b Mon Sep 17 00:00:00 2001
From: Glen <glen@Delias-MacBook-Air-2.local>
Date: Sun, 8 Dec 2024 18:32:06 +0000
Subject: [PATCH] new_conf

---
 .github/bench.py                              |  6 +-
 .../{build_and_test.yml => bench_job.yml}     | 55 +++++++++++--------
 .github/workflows/benchmarks.yml              | 16 ++++++
 3 files changed, 49 insertions(+), 28 deletions(-)
 rename .github/workflows/{build_and_test.yml => bench_job.yml} (70%)
 create mode 100644 .github/workflows/benchmarks.yml

diff --git a/.github/bench.py b/.github/bench.py
index 0dac02534..e06ecdff2 100644
--- a/.github/bench.py
+++ b/.github/bench.py
@@ -21,9 +21,7 @@ async def measure_performance(api_endpoint: str, prompt: str) -> Dict[str, Any]:
     """
     model = os.environ.get('model')
     results: Dict[str, Any] = {'model': model, 'run_id': os.environ.get('GITHUB_RUN_ID')}
-    results['configuration'] = {
-        'M4': 2 # TODO get this through env vars from the matrix def
-    }
+    results['configuration'] = json.loads(os.environ.get('HARDWARE_CONFIG'))
 
     # Get prompt length in tokens
     async with aiohttp.ClientSession() as session:
@@ -37,7 +35,7 @@ async def measure_performance(api_endpoint: str, prompt: str) -> Dict[str, Any]:
                 json=request_payload
             ) as response:
                 token_data = await response.json()
-                prompt_tokens = token_data.get('length', 0)
+                prompt_tokens = token_data.get('num_tokens', 0)
                 print(f"Prompt length: {prompt_tokens} tokens", flush=True)
         except Exception as e:
             print(f"Failed to get prompt length: {e}", flush=True)
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/bench_job.yml
similarity index 70%
rename from .github/workflows/build_and_test.yml
rename to .github/workflows/bench_job.yml
index c733731b1..8cb4bd7aa 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/bench_job.yml
@@ -1,26 +1,38 @@
-name: Build and Test
+# This is the reusable workflow file
+name: Distributed Job Runner
 
 on:
-  push:
-    branches: [ '*' ]
-    tags: [ '*' ]
-  pull_request:
-    branches: [ '*' ]
-
-env:
-  PYTHON_VERSION: "3.12"
-  TOKENIZERS_PARALLELISM: "false"
-  PYTHONPATH: "."
-
+  workflow_call:
+    inputs:
+      config:
+        required: true
+        type: string
+      model:
+        required: true
+        type: string
 jobs:
-  check_local_runner:
+  generate-matrix:
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+    steps:
+      - id: set-matrix
+        env:
+          CONFIG: ${{ inputs.config }}
+        run: |
+          MATRIX=$(echo $CONFIG | jq -c '{cpu: [to_entries | .[] | .key as $k | range(.value) | $k]}')
+          echo "matrix=$MATRIX" >> $GITHUB_OUTPUT
+
+  run-distributed-job:
+    needs: generate-matrix
     strategy:
-      matrix:
-        cpu: ['M4', 'M4']
+      matrix: ${{fromJson(needs.generate-matrix.outputs.matrix)}}
     runs-on: ['self-hosted', 'macOS', '${{ matrix.cpu }}']
+    env:
+      HARDWARE_CONFIG: ${{ inputs.config }}
+      model: ${{ inputs.model }}
     steps:
       - uses: actions/checkout@v4
-
       - name: Install dependencies
         run: |
           # First, find where python3.12 is installed
@@ -37,18 +49,16 @@ jobs:
           pip install --upgrade pip
           pip install .
           pip install boto3==1.35.76
-
       - name: Run exo
         env:
           aws_access_key_id: ${{ secrets.S3_EXO_BENCHMARKS_AWS_ACCESS_KEY_ID }}
           aws_secret_key: ${{ secrets.S3_EXO_BENCHMARKS_AWS_SECRET_ACCESS_KEY }}
-          model: llama-3.2-1b
         run: |
-          ALL_NODE_IDS=$(for i in $(seq ${{ strategy.job-total }} -1 0); do echo -n "${GITHUB_JOB}_${i},"; done | sed 's/,$//')
-          MY_NODE_ID="${GITHUB_JOB}_${{ strategy.job-index }}"
+          UNIQUE_JOB_ID="${GITHUB_JOB}_${GITHUB_RUN_ID}"
+          ALL_NODE_IDS=$(for i in $(seq ${{ strategy.job-total }} -1 0); do echo -n "${UNIQUE_JOB_ID}_${i},"; done | sed 's/,$//')
+          MY_NODE_ID="${UNIQUE_JOB_ID}_${{ strategy.job-index }}"
           source env/bin/activate
           export PATH="/usr/local/bin:/opt/homebrew/bin:$PATH"
-          echo "${ALL_NODE_IDS}"
           exo --node-id="${MY_NODE_ID}" --node-id-filter="${ALL_NODE_IDS}" --chatgpt-api-port 52415 --disable-tui > output1.log 2>&1 &
           PID1=$!
           tail -f output1.log &
@@ -83,6 +93,3 @@ jobs:
             sleep 5
           done
           fi
-      - name: Test
-        run: |
-          echo "GITHUB_JOB: ${GITHUB_JOB}, GITHUB_RUN_ID: ${GITHUB_RUN_ID}, GITHUB_RUN_NUMBER: ${GITHUB_RUN_NUMBER}, GITHUB_WORKFLOW: ${GITHUB_WORKFLOW}"
\ No newline at end of file
diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
new file mode 100644
index 000000000..c5146b208
--- /dev/null
+++ b/.github/workflows/benchmarks.yml
@@ -0,0 +1,16 @@
+name: Build and Test
+
+on:
+  push:
+    branches: [ '*' ]
+    tags: [ '*' ]
+  pull_request:
+    branches: [ '*' ]
+
+jobs:
+  test-m4-cluster:
+    uses: ./.github/workflows/distributed_job.yml
+    with:
+      config: '{"M4PRO_GPU16_24GB": 2}'
+      model: 'llama-3.2-1b'
+    secrets: inherit
\ No newline at end of file