feat(blocks/nvidia): Add Nvidia deepfake detection block (#9213)

Adding a block to allow users to detect deepfakes in their workflows. This block takes in an image as input and returns the probability of it being a deepfake as well as the bounding boxes around the image. ### Changes 🏗️ - Added NvidiaDeepfakeDetectBlock - Added the ability to upload images on the frontend - Added the ability to render base64 encoded images on the frontend <img width="1001" alt="Screenshot 2025-01-07 at 2 16 42 PM" src="https://github.com/user-attachments/assets/c3d090f3-3981-4235-a66b-f8e2a3920a4d" /> ### Checklist 📋 #### For code changes: - [ ] I have clearly listed my changes in the PR description - [ ] I have made a test plan - [ ] I have tested my changes according to the test plan:  - [ ] ... <details> <summary>Example test plan</summary> - [ ] Create from scratch and execute an agent with at least 3 blocks - [ ] Import an agent from file upload, and confirm it executes correctly - [ ] Upload agent to marketplace - [ ] Import an agent from marketplace and confirm it executes correctly - [ ] Edit an agent from monitor, and confirm it executes correctly </details> #### For configuration changes: - [ ] `.env.example` is updated or already compatible with my changes - [ ] `docker-compose.yml` is updated or already compatible with my changes - [ ] I have included a list of my configuration changes in the PR description (under **Changes**) <details> <summary>Examples of configuration changes</summary> - Changing ports - Adding new services that need to communicate with each other - Secrets or environment variable changes - New or infrastructure changes such as databases </details> --------- Co-authored-by: Nicholas Tindle <[email protected]>
Significant-Gravitas · Jan 7, 2025 · b558cca · b558cca
1 parent 4115f65
commit b558cca
Show file tree

Hide file tree

Showing 10 changed files with 253 additions and 13 deletions.
diff --git a/autogpt_platform/backend/backend/blocks/nvidia/_auth.py b/autogpt_platform/backend/backend/blocks/nvidia/_auth.py
@@ -0,0 +1,32 @@
+from typing import Literal
+
+from pydantic import SecretStr
+
+from backend.data.model import APIKeyCredentials, CredentialsField, CredentialsMetaInput
+from backend.integrations.providers import ProviderName
+
+NvidiaCredentials = APIKeyCredentials
+NvidiaCredentialsInput = CredentialsMetaInput[
+    Literal[ProviderName.NVIDIA],
+    Literal["api_key"],
+]
+
+TEST_CREDENTIALS = APIKeyCredentials(
+    id="01234567-89ab-cdef-0123-456789abcdef",
+    provider="nvidia",
+    api_key=SecretStr("mock-nvidia-api-key"),
+    title="Mock Nvidia API key",
+    expires_at=None,
+)
+
+TEST_CREDENTIALS_INPUT = {
+    "provider": TEST_CREDENTIALS.provider,
+    "id": TEST_CREDENTIALS.id,
+    "type": TEST_CREDENTIALS.type,
+    "title": TEST_CREDENTIALS.title,
+}
+
+
+def NvidiaCredentialsField() -> NvidiaCredentialsInput:
+    """Creates an Nvidia credentials input on a block."""
+    return CredentialsField(description="The Nvidia integration requires an API Key.")
diff --git a/autogpt_platform/backend/backend/blocks/nvidia/deepfake.py b/autogpt_platform/backend/backend/blocks/nvidia/deepfake.py
@@ -0,0 +1,90 @@
+from backend.blocks.nvidia._auth import (
+    NvidiaCredentials,
+    NvidiaCredentialsField,
+    NvidiaCredentialsInput,
+)
+from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
+from backend.data.model import SchemaField
+from backend.util.request import requests
+
+
+class NvidiaDeepfakeDetectBlock(Block):
+    class Input(BlockSchema):
+        credentials: NvidiaCredentialsInput = NvidiaCredentialsField()
+        image_base64: str = SchemaField(
+            description="Image to analyze for deepfakes", image_upload=True
+        )
+        return_image: bool = SchemaField(
+            description="Whether to return the processed image with markings",
+            default=False,
+        )
+
+    class Output(BlockSchema):
+        status: str = SchemaField(
+            description="Detection status (SUCCESS, ERROR, CONTENT_FILTERED)",
+            default="",
+        )
+        image: str = SchemaField(
+            description="Processed image with detection markings (if return_image=True)",
+            default="",
+            image_output=True,
+        )
+        is_deepfake: float = SchemaField(
+            description="Probability that the image is a deepfake (0-1)",
+            default=0.0,
+        )
+
+    def __init__(self):
+        super().__init__(
+            id="8c7d0d67-e79c-44f6-92a1-c2600c8aac7f",
+            description="Detects potential deepfakes in images using Nvidia's AI API",
+            categories={BlockCategory.SAFETY},
+            input_schema=NvidiaDeepfakeDetectBlock.Input,
+            output_schema=NvidiaDeepfakeDetectBlock.Output,
+        )
+
+    def run(
+        self, input_data: Input, *, credentials: NvidiaCredentials, **kwargs
+    ) -> BlockOutput:
+        url = "https://ai.api.nvidia.com/v1/cv/hive/deepfake-image-detection"
+
+        headers = {
+            "accept": "application/json",
+            "content-type": "application/json",
+            "Authorization": f"Bearer {credentials.api_key.get_secret_value()}",
+        }
+
+        image_data = f"data:image/jpeg;base64,{input_data.image_base64}"
+
+        payload = {
+            "input": [image_data],
+            "return_image": input_data.return_image,
+        }
+
+        try:
+            response = requests.post(url, headers=headers, json=payload)
+            response.raise_for_status()
+            data = response.json()
+
+            result = data.get("data", [{}])[0]
+
+            # Get deepfake probability from first bounding box if any
+            deepfake_prob = 0.0
+            if result.get("bounding_boxes"):
+                deepfake_prob = result["bounding_boxes"][0].get("is_deepfake", 0.0)
+
+            yield "status", result.get("status", "ERROR")
+            yield "is_deepfake", deepfake_prob
+
+            if input_data.return_image:
+                image_data = result.get("image", "")
+                output_data = f"data:image/jpeg;base64,{image_data}"
+                yield "image", output_data
+            else:
+                yield "image", ""
+
+        except Exception as e:
+            yield "error", str(e)
+            yield "status", "ERROR"
+            yield "is_deepfake", 0.0
+            yield "image", ""
diff --git a/autogpt_platform/backend/backend/data/block.py b/autogpt_platform/backend/backend/data/block.py
@@ -61,6 +61,9 @@ class BlockCategory(Enum):
     HARDWARE = "Block that interacts with hardware."
     AGENT = "Block that interacts with other agents."
     CRM = "Block that interacts with CRM services."
+    SAFETY = (
+        "Block that provides AI safety mechanisms such as detecting harmful content"
+    )
 
     def dict(self) -> dict[str, str]:
         return {"category": self.name, "description": self.value}

diff --git a/autogpt_platform/backend/backend/data/model.py b/autogpt_platform/backend/backend/data/model.py
@@ -139,6 +139,8 @@ def SchemaField(
     exclude: bool = False,
     hidden: Optional[bool] = None,
     depends_on: list[str] | None = None,
+    image_upload: Optional[bool] = None,
+    image_output: Optional[bool] = None,
     **kwargs,
 ) -> T:
     if default is PydanticUndefined and default_factory is None:
@@ -154,6 +156,8 @@ def SchemaField(
             "advanced": advanced,
             "hidden": hidden,
             "depends_on": depends_on,
+            "image_upload": image_upload,
+            "image_output": image_output,
         }.items()
         if v is not None
     }

diff --git a/autogpt_platform/backend/backend/integrations/providers.py b/autogpt_platform/backend/backend/integrations/providers.py
@@ -19,6 +19,7 @@ class ProviderName(str, Enum):
     JINA = "jina"
     MEDIUM = "medium"
     NOTION = "notion"
+    NVIDIA = "nvidia"
     OLLAMA = "ollama"
     OPENAI = "openai"
     OPENWEATHERMAP = "openweathermap"

diff --git a/autogpt_platform/frontend/src/components/integrations/credentials-input.tsx b/autogpt_platform/frontend/src/components/integrations/credentials-input.tsx
@@ -53,6 +53,7 @@ export const providerIcons: Record<
   google: FaGoogle,
   groq: fallbackIcon,
   notion: NotionLogoIcon,
+  nvidia: fallbackIcon,
   discord: FaDiscord,
   d_id: fallbackIcon,
   google_maps: FaGoogle,

diff --git a/autogpt_platform/frontend/src/components/integrations/credentials-provider.tsx b/autogpt_platform/frontend/src/components/integrations/credentials-provider.tsx
@@ -28,6 +28,7 @@ const providerDisplayNames: Record<CredentialsProviderName, string> = {
   jina: "Jina",
   medium: "Medium",
   notion: "Notion",
+  nvidia: "Nvidia",
   ollama: "Ollama",
   openai: "OpenAI",
   openweathermap: "OpenWeatherMap",

diff --git a/autogpt_platform/frontend/src/components/node-input-components.tsx b/autogpt_platform/frontend/src/components/node-input-components.tsx
@@ -101,6 +101,92 @@ const NodeObjectInputTree: FC<NodeObjectInputTreeProps> = ({
 
 export default NodeObjectInputTree;
 
+const NodeImageInput: FC<{
+  selfKey: string;
+  schema: BlockIOStringSubSchema;
+  value?: string;
+  error?: string;
+  handleInputChange: NodeObjectInputTreeProps["handleInputChange"];
+  className?: string;
+  displayName: string;
+}> = ({
+  selfKey,
+  schema,
+  value = "",
+  error,
+  handleInputChange,
+  className,
+  displayName,
+}) => {
+  const handleFileChange = useCallback(
+    async (event: React.ChangeEvent<HTMLInputElement>) => {
+      const file = event.target.files?.[0];
+      if (!file) return;
+
+      // Validate file type
+      if (!file.type.startsWith("image/")) {
+        console.error("Please upload an image file");
+        return;
+      }
+
+      // Convert to base64
+      const reader = new FileReader();
+      reader.onload = (e) => {
+        const base64String = (e.target?.result as string).split(",")[1];
+        handleInputChange(selfKey, base64String);
+      };
+      reader.readAsDataURL(file);
+    },
+    [selfKey, handleInputChange],
+  );
+
+  return (
+    <div className={cn("flex flex-col gap-2", className)}>
+      <div className="nodrag flex flex-col gap-2">
+        <div className="flex items-center gap-2">
+          <Button
+            variant="outline"
+            onClick={() =>
+              document.getElementById(`${selfKey}-upload`)?.click()
+            }
+            className="w-full"
+          >
+            {value ? "Change Image" : `Upload ${displayName}`}
+          </Button>
+          {value && (
+            <Button
+              variant="ghost"
+              className="text-red-500 hover:text-red-700"
+              onClick={() => handleInputChange(selfKey, "")}
+            >
+              <Cross2Icon className="h-4 w-4" />
+            </Button>
+          )}
+        </div>
+
+        <input
+          id={`${selfKey}-upload`}
+          type="file"
+          accept="image/*"
+          onChange={handleFileChange}
+          className="hidden"
+        />
+
+        {value && (
+          <div className="relative mt-2 rounded-md border border-gray-300 p-2 dark:border-gray-600">
+            <img
+              src={`data:image/jpeg;base64,${value}`}
+              alt="Preview"
+              className="max-h-32 w-full rounded-md object-contain"
+            />
+          </div>
+        )}
+      </div>
+      {error && <span className="error-message">{error}</span>}
+    </div>
+  );
+};
+
 const NodeDateTimeInput: FC<{
   selfKey: string;
   schema: BlockIOStringSubSchema;
@@ -418,6 +504,19 @@ export const NodeGenericInputField: FC<{
 
   switch (propSchema.type) {
     case "string":
+      if ("image_upload" in propSchema && propSchema.image_upload === true) {
+        return (
+          <NodeImageInput
+            selfKey={propKey}
+            schema={propSchema}
+            value={currentValue}
+            error={errors[propKey]}
+            className={className}
+            displayName={displayName}
+            handleInputChange={handleInputChange}
+          />
+        );
+      }
       if ("format" in propSchema && propSchema.format === "date-time") {
         return (
           <NodeDateTimeInput

diff --git a/autogpt_platform/frontend/src/components/ui/render.tsx b/autogpt_platform/frontend/src/components/ui/render.tsx
@@ -17,6 +17,9 @@ const isValidVideoUrl = (url: string): boolean => {
 };
 
 const isValidImageUrl = (url: string): boolean => {
+  if (url.startsWith("data:image/")) {
+    return true;
+  }
   const imageExtensions = /\.(jpeg|jpg|gif|png|svg|webp)$/i;
   const cleanedUrl = url.split("?")[0];
   return imageExtensions.test(cleanedUrl);
@@ -50,19 +53,21 @@ const VideoRenderer: React.FC<{ videoUrl: string }> = ({ videoUrl }) => {
   );
 };
 
-const ImageRenderer: React.FC<{ imageUrl: string }> = ({ imageUrl }) => (
-  <div className="w-full p-2">
-    <picture>
-      <img
-        src={imageUrl}
-        alt="Image"
-        className="h-auto max-w-full"
-        width="100%"
-        height="auto"
-      />
-    </picture>
-  </div>
-);
+const ImageRenderer: React.FC<{ imageUrl: string }> = ({ imageUrl }) => {
+  return (
+    <div className="w-full p-2">
+      <picture>
+        <img
+          src={imageUrl}
+          alt="Image"
+          className="h-auto max-w-full"
+          width="100%"
+          height="auto"
+        />
+      </picture>
+    </div>
+  );
+};
 
 const AudioRenderer: React.FC<{ audioUrl: string }> = ({ audioUrl }) => (
   <div className="w-full p-2">
@@ -92,6 +97,9 @@ export const ContentRenderer: React.FC<{
   truncateLongData?: boolean;
 }> = ({ value, truncateLongData }) => {
   if (typeof value === "string") {
+    if (value.startsWith("data:image/")) {
+      return <ImageRenderer imageUrl={value} />;
+    }
     if (isValidVideoUrl(value)) {
       return <VideoRenderer videoUrl={value} />;
     } else if (isValidImageUrl(value)) {

diff --git a/autogpt_platform/frontend/src/lib/autogpt-server-api/types.ts b/autogpt_platform/frontend/src/lib/autogpt-server-api/types.ts
@@ -113,6 +113,7 @@ export const PROVIDER_NAMES = {
   JINA: "jina",
   MEDIUM: "medium",
   NOTION: "notion",
+  NVIDIA: "nvidia",
   OLLAMA: "ollama",
   OPENAI: "openai",
   OPENWEATHERMAP: "openweathermap",