Init

stakewise · Jan 10, 2025 · 4a65ba4 · 4a65ba4
1 parent 910de29
commit 4a65ba4
Show file tree

Hide file tree

Showing 14 changed files with 827 additions and 0 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,11 @@
+.venv
+__pycache__
+tmp/
+tmp2/
+.vscode
+config.toml
+google-credentials.json
+.git
+.gitignore
+.dockerignore
+README.md
diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
@@ -0,0 +1,37 @@
+name: Docker
+
+on:
+  push:
+
+jobs:
+  docker:
+    name: Build Docker Image
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: |
+            europe-west4-docker.pkg.dev/stakewiselabs/public/node-snapshots
+          tags: |
+            type=ref,event=branch
+            type=ref,event=tag
+            type=sha
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Login to GAR
+        uses: docker/login-action@v3
+        with:
+          registry: europe-west4-docker.pkg.dev
+          username: _json_key
+          password: ${{ secrets.GAR_JSON_KEY }}
+      - name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          platforms: linux/amd64
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,7 @@
+.venv
+__pycache__
+tmp/
+tmp2/
+.vscode
+config.toml
+google-credentials.json
diff --git a/.python-version b/.python-version
@@ -0,0 +1 @@
+3.12
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,28 @@
+# Use a Python image with uv pre-installed
+FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim
+
+# Install the project into `/app`
+WORKDIR /app
+
+# Enable bytecode compilation
+ENV UV_COMPILE_BYTECODE=1
+
+# Copy from the cache instead of linking since it's a mounted volume
+ENV UV_LINK_MODE=copy
+
+COPY pyproject.toml .
+COPY uv.lock .
+
+# Install the project's dependencies using the lockfile and settings
+RUN uv sync --frozen --no-install-project --no-dev
+
+# Then, add the rest of the project source code and install it
+# Installing separately from its dependencies allows optimal layer caching
+ADD . /app
+RUN uv sync --frozen --no-dev
+
+# Place executables in the environment at the front of the path
+ENV PATH="/app/.venv/bin:$PATH"
+
+# Reset the entrypoint, don't invoke `uv`
+ENTRYPOINT ["uv", "run"]
diff --git a/README.md b/README.md
@@ -1,2 +1,103 @@
 # node-snapshots
+
 Stores the snapshots of Ethereum Execution and Consensus clients
+
+## Overview
+
+This tool allows you to manage Ethereum/Gnosis nodes running in docker containers snapshots by creating backups of specified directories, uploading those backups to Google Cloud Storage (GCS), and facilitating the recovery process. The tool supports running multiple node backups in parallel for efficiency.
+
+## Features
+
+- **Node Snapshot**: Create tarballs of specified directories with container stop.
+- **Upload to Google Cloud Storage**: Upload the created tarballs to a specified GCS bucket.
+- **Parallel Execution**: Take snapshots of multiple containers in parallel.
+- **Recovery**: Download and extract snapshots from GCS to recover node data.
+
+## Prerequisites
+
+- **Docker**: Ensure Docker is installed and running on your machine.
+- **Google Cloud Storage**: You need a Google Cloud project with a GCS bucket and service account credentials.
+- **Python 3.12+**: The script uses Python 3 and requires the installation of the necessary libraries.
+
+## Configuration
+
+### Configuration File (`config.toml`)
+
+This tool uses a `.toml` configuration file to specify the Docker containers to back up, as well as the Google Cloud Storage details. Here’s an example configuration:
+
+```toml
+# Docker host configuration
+docker_host = "unix:///var/run/docker.sock"
+
+# Google Cloud service account credentials
+google_credentials_json = "google-credentials.json"
+
+# Google Cloud Storage bucket name
+bucket_name = "node-snapshots-bucket"
+
+# Docker containers and their paths for snapshot management
+[[docker_containers]]
+container_name = "nethermind"
+data_path = "/data/nethermind"
+tar_name = "nethermind.tar"
+bucket_path = "mainnet/nethermind.tar"
+recovery_path = "/data/recovered/nethermind"
+
+[[docker_containers]]
+container_name = "lighthouse"
+data_path = "/data/lighthouse"
+tar_name = "nethermind.tar"
+bucket_path = "mainnet/lighthouse.tar"
+recovery_path = "/data/recovered/lighthouse"
+```
+
+### Configuration Fields
+
+- **docker_host**: The location of your Docker daemon (e.g., `"unix:///var/run/docker.sock"` for Linux or macOS).
+- **google_credentials_json**: Path to the Google Cloud service account credentials JSON file.
+- **bucket_name**: The name of your GCS bucket to upload snapshots to.
+- **docker_containers**:
+  - **container_name**: The name of the Docker container to back up.
+  - **data_path**: Path inside the container to back up (relative or absolute).
+  - **tar_name**: Name of the tarball file to create.
+  - **bucket_path**: Path in the GCS bucket where the tarball will be uploaded.
+  - **recovery_path**: Path on the local filesystem where the tarball will be extracted during recovery.
+
+## Usage
+
+### 1. Take Snapshots of Docker Containers
+
+Run the tool to take snapshots of the Docker containers specified in the configuration file. This will create tarballs of the `data_path` directories, upload them to GCS, and clean up after the operation.
+
+```bash
+python snapshot.py --config path_to_your_config.toml
+```
+
+### 2. Recovery of Docker Containers
+
+To recover the data from the snapshot, the tool can download the tarball from GCS and extract it to the specified `recovery_path`.
+
+The script will:
+
+1. Check if the recovery directory exists.
+2. If the directory is not empty, ask for confirmation before deleting the existing files.
+3. Download the tarball from GCS.
+4. Extract the tarball to the `recovery_path`.
+
+To trigger the recovery process, simply run the same tool, and it will automatically download and extract the snapshots.
+
+```bash
+python recovery.py --config path_to_your_config.toml
+```
+
+## Example Workflow
+
+1. **Configuration**: Set up the `config.toml` file with the paths to Docker containers and the corresponding Google Cloud Storage bucket details.
+2. **Run Backup**: Run the backup script to create tarballs of the Docker container data and upload them to Google Cloud Storage.
+3. **Recovery**: Run the recovery script to download and extract the snapshot tarballs into the desired recovery directories.
+
+## Troubleshooting
+
+- **Google Cloud Authentication**: Make sure the `google-credentials.json` file points to a valid Google Cloud service account with the necessary permissions to access your GCS bucket.
+- **Docker Issues**: Ensure that the Docker daemon is running and that the container names in the configuration file match the actual running containers.
+- **Permission Issues**: Verify that the user running the script has permission to write to the specified paths (e.g., the local recovery path and the GCS bucket).
diff --git a/config.toml.example b/config.toml.example
@@ -0,0 +1,23 @@
+# Docker host configuration
+docker_host = "unix:///var/run/docker.sock"
+
+# Google Cloud service account credentials
+google_credentials_json = "google-credentials.json"
+
+# Google Cloud Storage bucket name
+bucket_name = "node-snapshots"
+
+# Docker containers and their paths for snapshot management
+[[docker_containers]]
+container_name = "nethermind"
+data_path = "/data/nethermind"
+tar_name = "nethermind.tar"
+bucket_path = "mainnet/nethermind.tar"
+recovery_path = "/data/recovered/nethermind"
+
+[[docker_containers]]
+container_name = "lighthouse"
+data_path = "/data/lighthouse"
+tar_name = "nethermind.tar"
+bucket_path = "mainnet/lighthouse.tar"
+recovery_path = "/data/recovered/lighthouse"
diff --git a/containers.py b/containers.py
@@ -0,0 +1,15 @@
+import docker
+
+def stop(docker_client: docker.DockerClient, container_names: list[str]) -> None:
+    """Stop the Docker containers by name."""
+    for container_name in container_names:
+        container = docker_client.containers.get(container_name)
+        container.stop()
+        print(f"Container {container_name} stopped.")
+
+def start(docker_client: docker.DockerClient, container_names: list[str]) -> None:
+    """Start the Docker containers by name."""
+    for container_name in container_names:
+        container = docker_client.containers.get(container_name)
+        container.start()
+        print(f"Container {container_name} started.")
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,12 @@
+[project]
+name = "node-snapshot"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "docker>=7.1.0",
+    "google-cloud-storage>=2.19.0",
+    "toml>=0.10.2",
+    "tqdm>=4.67.1",
+]
diff --git a/recovery.py b/recovery.py
@@ -0,0 +1,87 @@
+import os
+import shutil
+from google.cloud import storage
+from google.oauth2 import service_account
+import docker
+from settings import Settings, parse_arguments
+from storage import download_from_bucket, extract_tar
+from concurrent.futures import ThreadPoolExecutor
+
+import storage
+
+def verify_or_create_directory(recovery_path: str) -> bool:
+    """Verify if the recovery directory exists, create it if not, or ask for confirmation to delete contents."""
+    if not os.path.exists(recovery_path):
+        os.makedirs(recovery_path)
+        print(f"Created directory: {recovery_path}")
+    else:
+        if os.listdir(recovery_path):
+            # If the directory is not empty, ask for confirmation to delete its contents
+            confirm = input(f"The directory {recovery_path} is not empty. Do you want to delete its contents and recover from the snapshot? (y/n): ")
+            if confirm.lower() == 'y':
+                for filename in os.listdir(recovery_path):
+                    file_path = os.path.join(recovery_path, filename)
+                    try:
+                        if os.path.isdir(file_path):
+                            shutil.rmtree(file_path)
+                        else:
+                            os.remove(file_path)
+                    except Exception as e:
+                        print(f"Error removing {file_path}: {e}")
+                print(f"Deleted all contents of {recovery_path}.")
+            else:
+                print(f"Skipping recovery for {recovery_path}.")
+                return False
+        else:
+            print(f"The directory {recovery_path} is empty. Proceeding with recovery.")
+    return True
+
+def recover_snapshot(storage_client: storage.Client, settings: Settings, container_config: dict[str, str]) -> None:
+    """Download the snapshot tar file and extract it to the recovery path."""
+
+    tar_name = container_config['tar_name']
+    bucket_path = container_config['bucket_path']
+    recovery_path = container_config['recovery_path']
+
+    # Step 1: Verify or create the recovery directory and delete contents if necessary
+    if not verify_or_create_directory(recovery_path):
+        return  # Skip this container's recovery if directory isn't ready
+
+    # Step 2: Download the snapshot tar file from Google Cloud Storage
+    tar_path = f"./{tar_name}"
+    download_from_bucket(storage_client, settings.bucket_name, bucket_path, tar_path)
+
+    # Step 3: Extract the tarball to the recovery path
+    extract_tar(tar_path, recovery_path)
+
+    # Step 4: Clean up the tar file after recovery
+    os.remove(tar_path)
+    print(f"Cleaned up tar file: {tar_path}")
+
+def main() -> None:
+    """Main function to parse arguments, initialize clients, and recover snapshots in parallel."""
+    args = parse_arguments()
+    settings = Settings(config_path=args.config)
+
+    credentials = service_account.Credentials.from_service_account_file(
+        settings.google_credentials_json
+    )
+    print(settings)
+    storage_client = storage.Client(credentials=credentials)
+    docker_client = docker.DockerClient(base_url=settings.docker_host)
+
+    containers_config = settings.docker_containers
+
+    # Recover snapshot for each configured container
+    with ThreadPoolExecutor() as executor:
+        futures = []
+        for container_config in containers_config:
+            futures.append(executor.submit(recover_snapshot, storage_client, settings, container_config))
+
+        for future in futures:
+            future.result()
+
+    print("All snapshots recovered.")
+
+if __name__ == "__main__":
+    main()
diff --git a/settings.py b/settings.py
@@ -0,0 +1,46 @@
+import argparse
+import toml
+import os
+
+# Define the configuration class to hold settings
+class Settings:
+    def __init__(self, config_path: str):
+        self.config_path = config_path
+        self.google_credentials_json = None
+        self.bucket_name = None
+        self.docker_host = None
+        self.docker_containers = []
+
+        self.load_config()
+
+    def load_config(self):
+        """Load configuration from the provided TOML file."""
+        try:
+            # Parse the TOML file
+            config = toml.load(self.config_path)
+
+            # Extract values into the Settings object
+            self.google_credentials_json = config.get("google_credentials_json")
+            self.bucket_name = config.get("bucket_name")
+            self.docker_host = config.get("docker_host")
+            self.docker_containers = config.get("docker_containers", [])
+
+            # Ensure credentials file exists
+            if not os.path.isfile(self.google_credentials_json):
+                raise ValueError(f"Google credentials file does not exist: {self.google_credentials_json}")
+
+        except Exception as e:
+            print(f"Error loading configuration: {e}")
+            raise
+
+    def __repr__(self):
+        """Helper method to display loaded settings."""
+        return f"Settings(google_credentials_json={self.google_credentials_json}, " \
+               f"bucket_name={self.bucket_name}, docker_containers={self.docker_containers})"
+
+
+# Argument parsing 
+def parse_arguments():
+    parser = argparse.ArgumentParser(description="Load configuration and interact with Google Cloud Storage.")
+    parser.add_argument("-c", "--config", required=True, help="Path to the configuration file")
+    return parser.parse_args()