Skip to content

Commit

Permalink
Branch names fixing
Browse files Browse the repository at this point in the history
  • Loading branch information
SGX21 committed Jan 23, 2025
1 parent 0242407 commit 46f6d7d
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 48 deletions.
92 changes: 92 additions & 0 deletions .github/workflows/drive_to_s3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import os
import io
import boto3
from google.oauth2 import service_account
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload

# Configurations
GDRIVE_FOLDER_ID = os.getenv("GDRIVE_FOLDER_ID") # Google Drive Folder ID
GOOGLE_CREDENTIALS_JSON = os.getenv("GOOGLE_CREDENTIALS_JSON") # Path to Service Account JSON file or JSON string
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
S3_BUCKET_NAME = os.getenv("S3_BUCKET_NAME") # Your S3 bucket name
S3_UPLOAD_PATH = os.getenv("S3_UPLOAD_PATH", "") # Destination folder in S3 (empty means root)

# Authenticate with Google Drive
def authenticate_google_drive(credentials_json):
if os.path.isfile(credentials_json):
credentials = service_account.Credentials.from_service_account_file(
credentials_json, scopes=["https://www.googleapis.com/auth/drive.readonly"]
)
else:
credentials = service_account.Credentials.from_service_account_info(
eval(credentials_json), scopes=["https://www.googleapis.com/auth/drive.readonly"]
)
return build("drive", "v3", credentials=credentials)

# Recursively download files from Google Drive folder
def download_from_drive(service, folder_id, download_path):
os.makedirs(download_path, exist_ok=True)

# List all files and folders inside the current folder
results = service.files().list(
q=f"'{folder_id}' in parents and trashed=false",
fields="files(id, name, mimeType)"
).execute()

files = results.get("files", [])

for file in files:
file_id = file["id"]
file_name = file["name"]
mime_type = file["mimeType"]
file_path = os.path.join(download_path, file_name)

if mime_type == "application/vnd.google-apps.folder":
# If the file is a folder, recurse into it
print(f"Found folder: {file_name}, downloading contents...")
download_from_drive(service, file_id, file_path) # Recursively download this folder
else:
# Otherwise, download the file (binary)
print(f"Downloading file: {file_name}...")
request = service.files().get_media(fileId=file_id)
fh = io.FileIO(file_path, "wb")
downloader = MediaIoBaseDownload(fh, request)
done = False
while not done:
status, done = downloader.next_chunk()
print(f"Download progress: {int(status.progress() * 100)}%")

print(f"Download of {folder_id} complete!")

# Upload folder structure to S3, preserving directory structure
def upload_to_s3(local_folder, bucket_name, s3_upload_path, aws_access_key_id, aws_secret_access_key):
s3_client = boto3.client(
"s3",
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
)

for root, dirs, files in os.walk(local_folder):
for file in files:
local_path = os.path.join(root, file)
relative_path = os.path.relpath(local_path, local_folder) # Relative path inside the folder
s3_key = os.path.join(s3_upload_path, relative_path) # S3 path to upload to

print(f"Uploading {local_path} to s3://{bucket_name}/{s3_key}...")
s3_client.upload_file(local_path, bucket_name, s3_key)

print("Upload complete!")

# Main script
if __name__ == "__main__":
# Define download folder
DOWNLOAD_FOLDER = "downloads"

# Authenticate and download from Google Drive
drive_service = authenticate_google_drive(GOOGLE_CREDENTIALS_JSON)
download_from_drive(drive_service, GDRIVE_FOLDER_ID, DOWNLOAD_FOLDER)

# Upload to S3
upload_to_s3(DOWNLOAD_FOLDER, S3_BUCKET_NAME, S3_UPLOAD_PATH, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
67 changes: 19 additions & 48 deletions .github/workflows/s.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,66 +2,37 @@ name: Sync Google Drive to S3

on:
push:
branches: drive-to-s3
workflow_dispatch: #can be triggered manually
branches: feat/drive-to-s3

workflow_dispatch:
schedule:
- cron: "0 0 * * *" # every day at 00:00 utc
- cron: "0 0 * * *" # every day at 00:00 UTC

jobs:
sync:
runs-on: ubuntu-latest

steps:
- name: Install AWS CLI and GDrive
run: |
sudo apt-get update
sudo apt-get install -y unzip
curl -Ls https://github.com/prasmussen/gdrive/releases/download/2.1.0/gdrive-linux-x64 > gdrive
chmod +x gdrive
sudo mv gdrive /usr/local/bin/
aws --version
gdrive version
- name: Authenticate GDrive
env:
GDRIVE_TOKEN: ${{ secrets.GDRIVE_TOKEN }}
run: |
echo "${GDRIVE_TOKEN}" > ~/.gdrive_token
gdrive about
- name: Download Folder from Google Drive
run: |
mkdir -p downloads
gdrive download --recursive --path downloads/ <YOUR_GOOGLE_DRIVE_FOLDER_ID>
- name: Unzip Downloaded Folder
run: |
for zip_file in downloads/*.zip; do
unzip -o "$zip_file" -d downloads/unzipped/
done
- name: Checkout Code
uses: actions/checkout@v3

- name: Rename Existing Folder in S3 to Backup
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ap-south-1
run: |
aws s3 mv s3://your-bucket-name/isaac-s3-images/ s3://your-bucket-name/backup/ --recursive
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.9"

- name: Upload Folder to S3
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ap-south-1
- name: Install Dependencies
run: |
aws s3 cp downloads/unzipped/ s3://your-bucket-name/isaac-s3-images/ --recursive --acl public-read
python -m pip install --upgrade pip
pip install google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client boto3
- name: Delete Backup Folder from S3
if: success()
- name: Run Drive to S3 Script
env:
GDRIVE_FOLDER_ID: ${{ secrets.GDRIVE_FOLDER_ID }}
GOOGLE_CREDENTIALS_JSON: ${{ secrets.GOOGLE_SERVICE_ACCOUNT_CREDENTIALS }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.AWS_REGION }}
S3_BUCKET_NAME: ${{ secrets.S3_BUCKET_NAME }}
S3_UPLOAD_PATH: ${{ secrets.S3_UPLOAD_PATH }}
run: |
aws s3 rm s3://your-bucket-name/backup/ --recursive
python ./.github/workflows/drive_to_s3.py

0 comments on commit 46f6d7d

Please sign in to comment.