-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
111 additions
and
48 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
import os | ||
import io | ||
import boto3 | ||
from google.oauth2 import service_account | ||
from googleapiclient.discovery import build | ||
from googleapiclient.http import MediaIoBaseDownload | ||
|
||
# Configurations | ||
GDRIVE_FOLDER_ID = os.getenv("GDRIVE_FOLDER_ID") # Google Drive Folder ID | ||
GOOGLE_CREDENTIALS_JSON = os.getenv("GOOGLE_CREDENTIALS_JSON") # Path to Service Account JSON file or JSON string | ||
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID") | ||
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY") | ||
S3_BUCKET_NAME = os.getenv("S3_BUCKET_NAME") # Your S3 bucket name | ||
S3_UPLOAD_PATH = os.getenv("S3_UPLOAD_PATH", "") # Destination folder in S3 (empty means root) | ||
|
||
# Authenticate with Google Drive | ||
def authenticate_google_drive(credentials_json): | ||
if os.path.isfile(credentials_json): | ||
credentials = service_account.Credentials.from_service_account_file( | ||
credentials_json, scopes=["https://www.googleapis.com/auth/drive.readonly"] | ||
) | ||
else: | ||
credentials = service_account.Credentials.from_service_account_info( | ||
eval(credentials_json), scopes=["https://www.googleapis.com/auth/drive.readonly"] | ||
) | ||
return build("drive", "v3", credentials=credentials) | ||
|
||
# Recursively download files from Google Drive folder | ||
def download_from_drive(service, folder_id, download_path): | ||
os.makedirs(download_path, exist_ok=True) | ||
|
||
# List all files and folders inside the current folder | ||
results = service.files().list( | ||
q=f"'{folder_id}' in parents and trashed=false", | ||
fields="files(id, name, mimeType)" | ||
).execute() | ||
|
||
files = results.get("files", []) | ||
|
||
for file in files: | ||
file_id = file["id"] | ||
file_name = file["name"] | ||
mime_type = file["mimeType"] | ||
file_path = os.path.join(download_path, file_name) | ||
|
||
if mime_type == "application/vnd.google-apps.folder": | ||
# If the file is a folder, recurse into it | ||
print(f"Found folder: {file_name}, downloading contents...") | ||
download_from_drive(service, file_id, file_path) # Recursively download this folder | ||
else: | ||
# Otherwise, download the file (binary) | ||
print(f"Downloading file: {file_name}...") | ||
request = service.files().get_media(fileId=file_id) | ||
fh = io.FileIO(file_path, "wb") | ||
downloader = MediaIoBaseDownload(fh, request) | ||
done = False | ||
while not done: | ||
status, done = downloader.next_chunk() | ||
print(f"Download progress: {int(status.progress() * 100)}%") | ||
|
||
print(f"Download of {folder_id} complete!") | ||
|
||
# Upload folder structure to S3, preserving directory structure | ||
def upload_to_s3(local_folder, bucket_name, s3_upload_path, aws_access_key_id, aws_secret_access_key): | ||
s3_client = boto3.client( | ||
"s3", | ||
aws_access_key_id=aws_access_key_id, | ||
aws_secret_access_key=aws_secret_access_key, | ||
) | ||
|
||
for root, dirs, files in os.walk(local_folder): | ||
for file in files: | ||
local_path = os.path.join(root, file) | ||
relative_path = os.path.relpath(local_path, local_folder) # Relative path inside the folder | ||
s3_key = os.path.join(s3_upload_path, relative_path) # S3 path to upload to | ||
|
||
print(f"Uploading {local_path} to s3://{bucket_name}/{s3_key}...") | ||
s3_client.upload_file(local_path, bucket_name, s3_key) | ||
|
||
print("Upload complete!") | ||
|
||
# Main script | ||
if __name__ == "__main__": | ||
# Define download folder | ||
DOWNLOAD_FOLDER = "downloads" | ||
|
||
# Authenticate and download from Google Drive | ||
drive_service = authenticate_google_drive(GOOGLE_CREDENTIALS_JSON) | ||
download_from_drive(drive_service, GDRIVE_FOLDER_ID, DOWNLOAD_FOLDER) | ||
|
||
# Upload to S3 | ||
upload_to_s3(DOWNLOAD_FOLDER, S3_BUCKET_NAME, S3_UPLOAD_PATH, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters