Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🌱 initial workflow run collect data #194

Merged
256 changes: 210 additions & 46 deletions scripts/_download.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ import unzipper from "unzipper";
import * as tar from "tar";
import { bold, green, yellow } from "colorette";
import { chmodOwnerPlusX } from "./_util.js";
import { fetchFirstSuccessfulRun, fetchArtifactsForRun } from "./_github.js";

import { globby } from "globby";

const GITHUB_API = "https://api.github.com";

Expand Down Expand Up @@ -48,6 +51,10 @@ export async function getGitHubReleaseTagSha(org, repo, releaseTag) {
},
});

if (!response.ok) {
throw new Error(`Failed to fetch release tag sha: ${response.statusText}`);
}

return await response.text();
}

Expand All @@ -68,12 +75,8 @@ async function streamResponseToFile(targetFile, fetchResponse) {
const pipeToHash = pipeline(reader, hash);

// run them both, reject if either rejects, resolve with the hash
try {
await Promise.all([pipeToFile, pipeToHash]);
return Promise.resolve(hash.digest("hex"));
} catch (e) {
return Promise.reject(e);
}
await Promise.all([pipeToFile, pipeToHash]);
return hash.digest("hex");
}

export async function downloadAndExtractGitHubAsset(
Expand Down Expand Up @@ -101,11 +104,11 @@ export async function downloadAndExtractGitHubAsset(
await zipFile.extract({ path: assetDir });

const extractedFiles = await fs.readdir(assetDir);
extractedFiles.forEach(async (file) => {
for (const file of extractedFiles) {
if (chmod && extname(file) !== ".zip") {
chmodOwnerPlusX(join(assetDir, file));
}
});
}

console.log(`Extracted: ${gitHubReleaseAsset.name}`);
}
Expand All @@ -118,47 +121,43 @@ export async function downloadGitHubRelease({
releaseTag,
assets,
}) {
try {
const releaseData = await getGitHubReleaseMetadata(org, repo, releaseTag);
const commitId = await getGitHubReleaseTagSha(org, repo, releaseTag);
const releaseAssets = releaseData.assets;

const metadata = {
org,
repo,
releaseTag,
commitId,
collectedAt: new Date().toISOString(),
assets: [],
};

for (const { name, platform, arch, chmod } of assets) {
const releaseAsset = releaseAssets.find((a) => a.name.toLowerCase() === name.toLowerCase());
if (releaseAsset) {
try {
console.group(yellow(releaseAsset.name));
await downloadAndExtractGitHubAsset(targetDirectory, releaseAsset, platform, arch, chmod);
} finally {
console.groupEnd();
}
metadata.assets.push({
name: releaseAsset.name,
platform,
arch,
chmod: !!chmod,
updatedAt: releaseAsset.updated_at,
});
} else {
console.warn(`Asset [${name}] was not found in GitHub release ${releaseData.html_url}`);
const releaseData = await getGitHubReleaseMetadata(org, repo, releaseTag);
const commitId = await getGitHubReleaseTagSha(org, repo, releaseTag);
const releaseAssets = releaseData.assets;

const metadata = {
org,
repo,
releaseTag,
commitId,
collectedAt: new Date().toISOString(),
assets: [],
};

for (const { name, platform, arch, chmod } of assets) {
const releaseAsset = releaseAssets.find((a) => a.name.toLowerCase() === name.toLowerCase());
if (releaseAsset) {
try {
console.group(yellow(releaseAsset.name));
await downloadAndExtractGitHubAsset(targetDirectory, releaseAsset, platform, arch, chmod);
} finally {
console.groupEnd();
}
metadata.assets.push({
name: releaseAsset.name,
platform,
arch,
chmod: !!chmod,
updatedAt: releaseAsset.updated_at,
});
} else {
console.warn(`Asset [${name}] was not found in GitHub release ${releaseData.html_url}`);
}

await fs.writeJson(metaFile, metadata, { spaces: 2 });
console.log(`Metadata written to ${metaFile}`);
console.log(`All assets downloaded to: ${targetDirectory}`);
} catch (error) {
console.error("Error downloading the release:", error.message);
}

await fs.writeJson(metaFile, metadata, { spaces: 2 });
console.log(`Metadata written to ${metaFile}`);
console.log(`All assets downloaded to: ${targetDirectory}`);
}

export async function downloadAndExtractTarGz({ targetDirectory, url, sha256 }) {
Expand Down Expand Up @@ -203,3 +202,168 @@ export async function downloadAndExtractTarGz({ targetDirectory, url, sha256 })
console.groupEnd();
}
}

/**
* Download a workflow artifact from GitHub
*
* @param {string} url - Download URL for the artifact
* @param {string} name - Name of the artifact file
* @param {string} outputDir - Directory to save the downloaded artifact
* @returns {Promise<string>} - Path to the downloaded artifact file
*/
export async function downloadArtifact(url, name, outputDir, token) {
const assetDir = join(outputDir);
const assetFileName = join(assetDir, `${name}`);
await fs.ensureDir(assetDir);

console.log(`Downloading asset: ${name}`);
const response = await fetch(url, {
headers: {
Accept: "application/vnd.github.v3+json",
Authorization: `Bearer ${token}`,
},
});

if (!response.ok) {
throw new Error(`Failed to download ${name}: ${response.statusText}`);
}

const sha256 = await streamResponseToFile(assetFileName, response);
console.log(`Verifying download of: ${name}`);
console.log(`Asset sha256: ${green(sha256)}`);

console.log(`Downloaded ${name} to ${assetFileName}`);
return assetFileName;
}

/**
* Extract an artifact ZIP file.
*
* @param {string} filePath - Path to the ZIP file
* @param {string} tempDir - Temporary directory for extraction
* @param {string} finalDir - Final directory for extracted files
*/
export async function extractArtifact(filePath, tempDir, finalDir) {
console.log(`Checking if ${filePath} is a valid ZIP archive.`);

// First extraction to temporary directory
console.log(`Extracting ${filePath} to temporary directory: ${tempDir}`);
const unzipArtifact = await unzipper.Open.file(filePath);
await unzipArtifact.extract({ path: tempDir });

// Find and extract any nested zip files in the temp directory
const zipFiles = await globby("*.zip", { cwd: tempDir });
for (const file of zipFiles) {
const nestedZipPath = join(tempDir, file);
console.log(`Extracting nested zip: ${nestedZipPath} to ${finalDir}`);

const unzipNestedArtifact = await unzipper.Open.file(nestedZipPath);
await unzipNestedArtifact.extract({ path: finalDir });

console.log(`Deleting nested zip: ${nestedZipPath}`);
await fs.unlink(nestedZipPath);
}

// Cleanup temporary directory
console.log(`Cleaning up temporary directory: ${tempDir}`);
await fs.rm(tempDir, { recursive: true, force: true });

// Cleanup the original zip file
console.log(`Deleting original zip file: ${filePath}`);
await fs.unlink(filePath);

console.log(`Extraction complete: ${filePath} -> ${finalDir}`);
}

/**
* Download and process workflow artifacts.
*
* @param {string} targetDirectory - Directory to store downloaded artifacts
* @param {string} metaFile - Path to the metadata file
* @param {string} org - GH org
* @param {string} repo - GH repo
* @param {string} workflow - Name of the workflow file
* @param {Array} assets - List of assets to download
*/
export async function downloadWorkflowArtifacts({
targetDirectory,
metaFile,
org,
repo,
workflow,
assets,
}) {
const GITHUB_TOKEN = process.env.GITHUB_TOKEN;

if (!GITHUB_TOKEN) {
const errorMessage = "GITHUB_TOKEN environment variable is not set.";
console.error(errorMessage);
throw new Error(errorMessage);
}

console.log("Fetching first successful workflow run...");
const { workflowRunId, headSha } = await fetchFirstSuccessfulRun(org, repo, workflow);

if (!workflowRunId) {
throw new Error("No successful workflow runs found.");
}

console.log(`Workflow Run ID: ${workflowRunId}`);
const artifactUrls = await fetchArtifactsForRun(org, repo, workflowRunId);

if (!artifactUrls || artifactUrls.length === 0) {
throw new Error("No artifacts found for the workflow run.");
}

const metadata = {
org,
repo,
workflow,
workflowRunId,
commitId: headSha,
collectedAt: new Date().toISOString(),
assets: [],
};

for (const asset of assets) {
const { name } = asset;
const artifact = artifactUrls.find((a) => a.name === name);

if (!artifact) {
console.warn(`Asset [${name}] was not found in the workflow artifacts.`);
continue;
}

try {
console.log(`Processing artifact: ${name}`);

const downloadedFilePath = await downloadArtifact(
artifact.url,
name,
targetDirectory,
GITHUB_TOKEN,
);

// Extract the artifact
const tempDir = join(targetDirectory, "temp");
const extractionDir = join(targetDirectory, name.replace(/\.zip$/, ""));
await extractArtifact(downloadedFilePath, tempDir, extractionDir);
sjd78 marked this conversation as resolved.
Show resolved Hide resolved

for (const file of await globby(["*", "!*.zip"], { cwd: extractionDir, absolute: true })) {
chmodOwnerPlusX(file);
}

metadata.assets.push({
name,
extractionDir,
downloadedAt: new Date().toISOString(),
});
} catch (error) {
console.error(`Error processing artifact [${name}]:`, error.message);
throw new Error(`Failed to process artifact ${name}: ${error.message}`);
}
}

await fs.writeJson(metaFile, metadata, { spaces: 2 });
console.log(`Metadata written to ${metaFile}`);
}
Loading
Loading