diff --git a/.github/workflows/drive_to_s3.py b/.github/workflows/drive_to_s3.py new file mode 100644 index 00000000..6afb9954 --- /dev/null +++ b/.github/workflows/drive_to_s3.py @@ -0,0 +1,75 @@ +import os +import boto3 +from google.oauth2 import service_account +from googleapiclient.discovery import build +from googleapiclient.http import MediaIoBaseDownload +import io + +# Configurations +GDRIVE_FOLDER_ID = os.getenv("GDRIVE_FOLDER_ID") # Google Drive Folder ID +GOOGLE_CREDENTIALS_JSON = os.getenv("GOOGLE_CREDENTIALS_JSON") # Path to Service Account JSON file or JSON string +AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID") +AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY") +S3_BUCKET_NAME = os.getenv("S3_BUCKET_NAME") # Your S3 bucket name +S3_UPLOAD_PATH = os.getenv("S3_UPLOAD_PATH", "isaac-s3-images/") # Destination folder in S3 + +# Authenticate with Google Drive +def authenticate_google_drive(credentials_json): + if os.path.isfile(credentials_json): + credentials = service_account.Credentials.from_service_account_file( + credentials_json, scopes=["https://www.googleapis.com/auth/drive.readonly"] + ) + else: + credentials = service_account.Credentials.from_service_account_info( + eval(credentials_json), scopes=["https://www.googleapis.com/auth/drive.readonly"] + ) + return build("drive", "v3", credentials=credentials) + +# Download files from Google Drive +def download_from_drive(service, folder_id, download_path): + os.makedirs(download_path, exist_ok=True) + results = service.files().list( + q=f"'{folder_id}' in parents and trashed=false", + fields="files(id, name)" + ).execute() + files = results.get("files", []) + for file in files: + file_id = file["id"] + file_name = file["name"] + file_path = os.path.join(download_path, file_name) + print(f"Downloading {file_name}...") + request = service.files().get_media(fileId=file_id) + fh = io.FileIO(file_path, "wb") + downloader = MediaIoBaseDownload(fh, request) + done = False + while not done: + status, done = downloader.next_chunk() + print(f"Download progress: {int(status.progress() * 100)}%") + print("Download complete!") + +# Upload files to S3 +def upload_to_s3(local_folder, bucket_name, s3_upload_path, aws_access_key_id, aws_secret_access_key): + s3_client = boto3.client( + "s3", + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + for root, _, files in os.walk(local_folder): + for file in files: + local_path = os.path.join(root, file) + s3_key = os.path.join(s3_upload_path, os.path.relpath(local_path, local_folder)) + print(f"Uploading {local_path} to s3://{bucket_name}/{s3_key}...") + s3_client.upload_file(local_path, bucket_name, s3_key) + print("Upload complete!") + +# Main script +if __name__ == "__main__": + # Define download folder + DOWNLOAD_FOLDER = "downloads" + + # Authenticate and download from Google Drive + drive_service = authenticate_google_drive(GOOGLE_CREDENTIALS_JSON) + download_from_drive(drive_service, GDRIVE_FOLDER_ID, DOWNLOAD_FOLDER) + + # Upload to S3 + upload_to_s3(DOWNLOAD_FOLDER, S3_BUCKET_NAME, S3_UPLOAD_PATH, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) \ No newline at end of file diff --git a/.github/workflows/s.yml b/.github/workflows/s.yml index c720af8c..43bdd6be 100644 --- a/.github/workflows/s.yml +++ b/.github/workflows/s.yml @@ -2,66 +2,37 @@ name: Sync Google Drive to S3 on: push: - branches: drive-to-s3 - - workflow_dispatch: #can be triggered manually + branches: feat/drive-to-s3 + + workflow_dispatch: schedule: - - cron: "0 0 * * *" # every day at 00:00 utc + - cron: "0 0 * * *" # every day at 00:00 UTC jobs: sync: runs-on: ubuntu-latest steps: - - name: Install AWS CLI and GDrive - run: | - sudo apt-get update - sudo apt-get install -y unzip - curl -Ls https://github.com/prasmussen/gdrive/releases/download/2.1.0/gdrive-linux-x64 > gdrive - chmod +x gdrive - sudo mv gdrive /usr/local/bin/ - aws --version - gdrive version - - - name: Authenticate GDrive - env: - GDRIVE_TOKEN: ${{ secrets.GDRIVE_TOKEN }} - run: | - echo "${GDRIVE_TOKEN}" > ~/.gdrive_token - gdrive about - - - name: Download Folder from Google Drive - run: | - mkdir -p downloads - gdrive download --recursive --path downloads/ - - - name: Unzip Downloaded Folder - run: | - for zip_file in downloads/*.zip; do - unzip -o "$zip_file" -d downloads/unzipped/ - done + - name: Checkout Code + uses: actions/checkout@v3 - - name: Rename Existing Folder in S3 to Backup - env: - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - AWS_REGION: ap-south-1 - run: | - aws s3 mv s3://your-bucket-name/isaac-s3-images/ s3://your-bucket-name/backup/ --recursive + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.9" - - name: Upload Folder to S3 - env: - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - AWS_REGION: ap-south-1 + - name: Install Dependencies run: | - aws s3 cp downloads/unzipped/ s3://your-bucket-name/isaac-s3-images/ --recursive --acl public-read + python -m pip install --upgrade pip + pip install google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client boto3 - - name: Delete Backup Folder from S3 - if: success() + - name: Run Drive to S3 Script env: + GDRIVE_FOLDER_ID: ${{ secrets.GDRIVE_FOLDER_ID }} + GOOGLE_CREDENTIALS_JSON: ${{ secrets.GOOGLE_SERVICE_ACCOUNT_CREDENTIALS }} AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - AWS_REGION: ${{ secrets.AWS_REGION }} + S3_BUCKET_NAME: ${{ secrets.S3_BUCKET_NAME }} + S3_UPLOAD_PATH: ${{ secrets.S3_UPLOAD_PATH }} run: | - aws s3 rm s3://your-bucket-name/backup/ --recursive \ No newline at end of file + python drive_to_s3.py \ No newline at end of file