fix polling algo

succinctlabs · Sep 20, 2024 · 32661e1 · 32661e1
1 parent 8de4adc
commit 32661e1
Showing 1 changed file with 71 additions and 67 deletions.
diff --git a/.github/workflows/adhoc-matrix.yml b/.github/workflows/adhoc-matrix.yml
@@ -83,90 +83,94 @@ jobs:
           github-token: ${{ secrets.GH_PAT }}
           script: |
             const runnerName = '${{ steps.start-ec2-runner.outputs.label }}';
-            const maxAttempts = 5;
-            const initialDelay = 30000; // 30 seconds
+            const maxAttempts = 30; // Increased for polling
+            const pollInterval = 10000; // 10 seconds
             let triggeredRunId = null;
             
+            console.log('Triggering benchmark workflow');
+            
+            try {
+              await github.rest.actions.createWorkflowDispatch({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                workflow_id: 'run-on-runner.yml',
+                ref: context.ref,
+                inputs: {
+                  runner_name: runnerName,
+                  instance_type: '${{ matrix.instance_type }}',
+                  enable_gpu: '${{ matrix.enable_gpu }}',
+                  provers: '${{ inputs.provers }}',
+                  programs: '${{ inputs.programs }}',
+                  filename: '${{ inputs.filename }}_${{ matrix.instance_type }}',
+                  trials: '${{ inputs.trials }}',
+                  sp1_ref: '${{ inputs.sp1_ref }}',
+                  additional_params: '${{ inputs.additional_params }}'
+                }
+              });
+              console.log('Benchmark workflow triggered successfully');
+            } catch (error) {
+              core.setFailed(`Failed to trigger workflow: ${error.message}`);
+              return;
+            }
+            
+            console.log('Polling for the triggered run');
             for (let attempt = 1; attempt <= maxAttempts; attempt++) {
-              console.log(`Attempt ${attempt} to trigger benchmark workflow`);
-              
-              await new Promise(resolve => setTimeout(resolve, initialDelay * attempt));
+              await new Promise(resolve => setTimeout(resolve, pollInterval));
               
               try {
-                const result = await github.rest.actions.createWorkflowDispatch({
+                const runs = await github.rest.actions.listWorkflowRuns({
                   owner: context.repo.owner,
                   repo: context.repo.repo,
                   workflow_id: 'run-on-runner.yml',
-                  ref: context.ref,
-                  inputs: {
-                    runner_name: runnerName,
-                    instance_type: '${{ matrix.instance_type }}',
-                    enable_gpu: '${{ matrix.enable_gpu }}',
-                    provers: '${{ inputs.provers }}',
-                    programs: '${{ inputs.programs }}',
-                    filename: '${{ inputs.filename }}_${{ matrix.instance_type }}',
-                    trials: '${{ inputs.trials }}',
-                    sp1_ref: '${{ inputs.sp1_ref }}',
-                    additional_params: '${{ inputs.additional_params }}'
-                  }
+                  status: 'in_progress'
                 });
-                console.log('Benchmark workflow triggered successfully');
                 
-                // Wait for the run to appear in the list
-                for (let i = 0; i < 10; i++) {
-                  await new Promise(resolve => setTimeout(resolve, 5000));
-                  const runs = await github.rest.actions.listWorkflowRuns({
-                    owner: context.repo.owner,
-                    repo: context.repo.repo,
-                    workflow_id: 'run-on-runner.yml',
-                    status: 'in_progress'
-                  });
-                  const recentRun = runs.data.workflow_runs.find(run => 
-                    new Date(run.created_at).getTime() > Date.now() - 60000
-                  );
-                  if (recentRun) {
-                    // Fetch the run details to check inputs
-                    const runDetails = await github.rest.actions.getWorkflowRun({
-                      owner: context.repo.owner,
-                      repo: context.repo.repo,
-                      run_id: recentRun.id
-                    });
-                    
-                    // Check if the inputs match what we expect
-                    const inputs = runDetails.data.inputs;
-                    if (
-                      inputs.runner_name === runnerName &&
-                      inputs.instance_type === '${{ matrix.instance_type }}' &&
-                      inputs.enable_gpu === '${{ matrix.enable_gpu }}' &&
-                      inputs.provers === '${{ inputs.provers }}' &&
-                      inputs.programs === '${{ inputs.programs }}' &&
-                      inputs.filename === '${{ inputs.filename }}_${{ matrix.instance_type }}' &&
-                      inputs.trials === '${{ inputs.trials }}' &&
-                      inputs.sp1_ref === '${{ inputs.sp1_ref }}' &&
-                      inputs.additional_params === '${{ inputs.additional_params }}'
-                    ) {
-                      triggeredRunId = recentRun.id;
-                      break;
-                    } else {
-                      console.log('Found a run, but inputs do not match. Continuing search...');
+                for (const run of runs.data.workflow_runs) {
+                  if (new Date(run.created_at).getTime() > Date.now() - 300000) { // Within last 5 minutes
+                    try {
+                      const runDetails = await github.rest.actions.getWorkflowRun({
+                        owner: context.repo.owner,
+                        repo: context.repo.repo,
+                        run_id: run.id
+                      });
+                      
+                      const inputs = runDetails.data.inputs;
+                      if (
+                        inputs.runner_name === runnerName &&
+                        inputs.instance_type === '${{ matrix.instance_type }}' &&
+                        inputs.enable_gpu === '${{ matrix.enable_gpu }}' &&
+                        inputs.provers === '${{ inputs.provers }}' &&
+                        inputs.programs === '${{ inputs.programs }}' &&
+                        inputs.filename === '${{ inputs.filename }}_${{ matrix.instance_type }}' &&
+                        inputs.trials === '${{ inputs.trials }}' &&
+                        inputs.sp1_ref === '${{ inputs.sp1_ref }}' &&
+                        inputs.additional_params === '${{ inputs.additional_params }}'
+                      ) {
+                        triggeredRunId = run.id;
+                        console.log(`Found matching run. Triggered run ID: ${triggeredRunId}`);
+                        break;
+                      }
+                    } catch (error) {
+                      console.log(`Error checking inputs for run ${run.id}: ${error.message}`);
+                      // Continue to the next run
+                      continue;
                     }
                   }
                 }
                 
-                if (triggeredRunId) {
-                  console.log(`Triggered run ID: ${triggeredRunId}`);
-                  break;
-                } else {
-                  throw new Error('Failed to find the triggered workflow run with matching inputs');
-                }
+                if (triggeredRunId) break;
+                
+                console.log(`Attempt ${attempt}: Matching run not found yet. Continuing to poll...`);
               } catch (error) {
-                console.log(`Failed to trigger or find workflow: ${error.message}`);
-                if (attempt === maxAttempts) {
-                  core.setFailed('Failed to trigger benchmark workflow after multiple attempts');
-                }
+                console.log(`Error while polling: ${error.message}`);
               }
             }
-            // Use the new method to set output
+            
+            if (!triggeredRunId) {
+              core.setFailed('Failed to find the triggered workflow run with matching inputs after maximum attempts');
+              return;
+            }
+            
             core.exportVariable('TRIGGERED_RUN_ID', triggeredRunId);
             console.log(`Triggered run ID: ${triggeredRunId}`);