Skip to content

Commit

Permalink
feat(blocks/nvidia): Add Nvidia deepfake detection block (#9213)
Browse files Browse the repository at this point in the history
Adding a block to allow users to detect deepfakes in their workflows. 
This block takes in an image as input and returns the probability of it
being a deepfake as well as the bounding boxes around the image.

### Changes 🏗️

- Added NvidiaDeepfakeDetectBlock
- Added the ability to upload images on the frontend
- Added the ability to render base64 encoded images on the frontend
<img width="1001" alt="Screenshot 2025-01-07 at 2 16 42 PM"
src="https://github.com/user-attachments/assets/c3d090f3-3981-4235-a66b-f8e2a3920a4d"
/>

### Checklist 📋

#### For code changes:
- [ ] I have clearly listed my changes in the PR description
- [ ] I have made a test plan
- [ ] I have tested my changes according to the test plan:
  <!-- Put your test plan here: -->
  - [ ] ...

<details>
  <summary>Example test plan</summary>
  
  - [ ] Create from scratch and execute an agent with at least 3 blocks
- [ ] Import an agent from file upload, and confirm it executes
correctly
  - [ ] Upload agent to marketplace
- [ ] Import an agent from marketplace and confirm it executes correctly
  - [ ] Edit an agent from monitor, and confirm it executes correctly
</details>

#### For configuration changes:
- [ ] `.env.example` is updated or already compatible with my changes
- [ ] `docker-compose.yml` is updated or already compatible with my
changes
- [ ] I have included a list of my configuration changes in the PR
description (under **Changes**)

<details>
  <summary>Examples of configuration changes</summary>

  - Changing ports
  - Adding new services that need to communicate with each other
  - Secrets or environment variable changes
  - New or infrastructure changes such as databases
</details>

---------

Co-authored-by: Nicholas Tindle <[email protected]>
  • Loading branch information
aarushik93 and ntindle authored Jan 7, 2025
1 parent 4115f65 commit b558cca
Show file tree
Hide file tree
Showing 10 changed files with 253 additions and 13 deletions.
32 changes: 32 additions & 0 deletions autogpt_platform/backend/backend/blocks/nvidia/_auth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from typing import Literal

from pydantic import SecretStr

from backend.data.model import APIKeyCredentials, CredentialsField, CredentialsMetaInput
from backend.integrations.providers import ProviderName

NvidiaCredentials = APIKeyCredentials
NvidiaCredentialsInput = CredentialsMetaInput[
Literal[ProviderName.NVIDIA],
Literal["api_key"],
]

TEST_CREDENTIALS = APIKeyCredentials(
id="01234567-89ab-cdef-0123-456789abcdef",
provider="nvidia",
api_key=SecretStr("mock-nvidia-api-key"),
title="Mock Nvidia API key",
expires_at=None,
)

TEST_CREDENTIALS_INPUT = {
"provider": TEST_CREDENTIALS.provider,
"id": TEST_CREDENTIALS.id,
"type": TEST_CREDENTIALS.type,
"title": TEST_CREDENTIALS.title,
}


def NvidiaCredentialsField() -> NvidiaCredentialsInput:
"""Creates an Nvidia credentials input on a block."""
return CredentialsField(description="The Nvidia integration requires an API Key.")
90 changes: 90 additions & 0 deletions autogpt_platform/backend/backend/blocks/nvidia/deepfake.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from backend.blocks.nvidia._auth import (
NvidiaCredentials,
NvidiaCredentialsField,
NvidiaCredentialsInput,
)
from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
from backend.data.model import SchemaField
from backend.util.request import requests


class NvidiaDeepfakeDetectBlock(Block):
class Input(BlockSchema):
credentials: NvidiaCredentialsInput = NvidiaCredentialsField()
image_base64: str = SchemaField(
description="Image to analyze for deepfakes", image_upload=True
)
return_image: bool = SchemaField(
description="Whether to return the processed image with markings",
default=False,
)

class Output(BlockSchema):
status: str = SchemaField(
description="Detection status (SUCCESS, ERROR, CONTENT_FILTERED)",
default="",
)
image: str = SchemaField(
description="Processed image with detection markings (if return_image=True)",
default="",
image_output=True,
)
is_deepfake: float = SchemaField(
description="Probability that the image is a deepfake (0-1)",
default=0.0,
)

def __init__(self):
super().__init__(
id="8c7d0d67-e79c-44f6-92a1-c2600c8aac7f",
description="Detects potential deepfakes in images using Nvidia's AI API",
categories={BlockCategory.SAFETY},
input_schema=NvidiaDeepfakeDetectBlock.Input,
output_schema=NvidiaDeepfakeDetectBlock.Output,
)

def run(
self, input_data: Input, *, credentials: NvidiaCredentials, **kwargs
) -> BlockOutput:
url = "https://ai.api.nvidia.com/v1/cv/hive/deepfake-image-detection"

headers = {
"accept": "application/json",
"content-type": "application/json",
"Authorization": f"Bearer {credentials.api_key.get_secret_value()}",
}

image_data = f"data:image/jpeg;base64,{input_data.image_base64}"

payload = {
"input": [image_data],
"return_image": input_data.return_image,
}

try:
response = requests.post(url, headers=headers, json=payload)
response.raise_for_status()
data = response.json()

result = data.get("data", [{}])[0]

# Get deepfake probability from first bounding box if any
deepfake_prob = 0.0
if result.get("bounding_boxes"):
deepfake_prob = result["bounding_boxes"][0].get("is_deepfake", 0.0)

yield "status", result.get("status", "ERROR")
yield "is_deepfake", deepfake_prob

if input_data.return_image:
image_data = result.get("image", "")
output_data = f"data:image/jpeg;base64,{image_data}"
yield "image", output_data
else:
yield "image", ""

except Exception as e:
yield "error", str(e)
yield "status", "ERROR"
yield "is_deepfake", 0.0
yield "image", ""
3 changes: 3 additions & 0 deletions autogpt_platform/backend/backend/data/block.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ class BlockCategory(Enum):
HARDWARE = "Block that interacts with hardware."
AGENT = "Block that interacts with other agents."
CRM = "Block that interacts with CRM services."
SAFETY = (
"Block that provides AI safety mechanisms such as detecting harmful content"
)

def dict(self) -> dict[str, str]:
return {"category": self.name, "description": self.value}
Expand Down
4 changes: 4 additions & 0 deletions autogpt_platform/backend/backend/data/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ def SchemaField(
exclude: bool = False,
hidden: Optional[bool] = None,
depends_on: list[str] | None = None,
image_upload: Optional[bool] = None,
image_output: Optional[bool] = None,
**kwargs,
) -> T:
if default is PydanticUndefined and default_factory is None:
Expand All @@ -154,6 +156,8 @@ def SchemaField(
"advanced": advanced,
"hidden": hidden,
"depends_on": depends_on,
"image_upload": image_upload,
"image_output": image_output,
}.items()
if v is not None
}
Expand Down
1 change: 1 addition & 0 deletions autogpt_platform/backend/backend/integrations/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class ProviderName(str, Enum):
JINA = "jina"
MEDIUM = "medium"
NOTION = "notion"
NVIDIA = "nvidia"
OLLAMA = "ollama"
OPENAI = "openai"
OPENWEATHERMAP = "openweathermap"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ export const providerIcons: Record<
google: FaGoogle,
groq: fallbackIcon,
notion: NotionLogoIcon,
nvidia: fallbackIcon,
discord: FaDiscord,
d_id: fallbackIcon,
google_maps: FaGoogle,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ const providerDisplayNames: Record<CredentialsProviderName, string> = {
jina: "Jina",
medium: "Medium",
notion: "Notion",
nvidia: "Nvidia",
ollama: "Ollama",
openai: "OpenAI",
openweathermap: "OpenWeatherMap",
Expand Down
99 changes: 99 additions & 0 deletions autogpt_platform/frontend/src/components/node-input-components.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,92 @@ const NodeObjectInputTree: FC<NodeObjectInputTreeProps> = ({

export default NodeObjectInputTree;

const NodeImageInput: FC<{
selfKey: string;
schema: BlockIOStringSubSchema;
value?: string;
error?: string;
handleInputChange: NodeObjectInputTreeProps["handleInputChange"];
className?: string;
displayName: string;
}> = ({
selfKey,
schema,
value = "",
error,
handleInputChange,
className,
displayName,
}) => {
const handleFileChange = useCallback(
async (event: React.ChangeEvent<HTMLInputElement>) => {
const file = event.target.files?.[0];
if (!file) return;

// Validate file type
if (!file.type.startsWith("image/")) {
console.error("Please upload an image file");
return;
}

// Convert to base64
const reader = new FileReader();
reader.onload = (e) => {
const base64String = (e.target?.result as string).split(",")[1];
handleInputChange(selfKey, base64String);
};
reader.readAsDataURL(file);
},
[selfKey, handleInputChange],
);

return (
<div className={cn("flex flex-col gap-2", className)}>
<div className="nodrag flex flex-col gap-2">
<div className="flex items-center gap-2">
<Button
variant="outline"
onClick={() =>
document.getElementById(`${selfKey}-upload`)?.click()
}
className="w-full"
>
{value ? "Change Image" : `Upload ${displayName}`}
</Button>
{value && (
<Button
variant="ghost"
className="text-red-500 hover:text-red-700"
onClick={() => handleInputChange(selfKey, "")}
>
<Cross2Icon className="h-4 w-4" />
</Button>
)}
</div>

<input
id={`${selfKey}-upload`}
type="file"
accept="image/*"
onChange={handleFileChange}
className="hidden"
/>

{value && (
<div className="relative mt-2 rounded-md border border-gray-300 p-2 dark:border-gray-600">
<img
src={`data:image/jpeg;base64,${value}`}
alt="Preview"
className="max-h-32 w-full rounded-md object-contain"
/>
</div>
)}
</div>
{error && <span className="error-message">{error}</span>}
</div>
);
};

const NodeDateTimeInput: FC<{
selfKey: string;
schema: BlockIOStringSubSchema;
Expand Down Expand Up @@ -418,6 +504,19 @@ export const NodeGenericInputField: FC<{

switch (propSchema.type) {
case "string":
if ("image_upload" in propSchema && propSchema.image_upload === true) {
return (
<NodeImageInput
selfKey={propKey}
schema={propSchema}
value={currentValue}
error={errors[propKey]}
className={className}
displayName={displayName}
handleInputChange={handleInputChange}
/>
);
}
if ("format" in propSchema && propSchema.format === "date-time") {
return (
<NodeDateTimeInput
Expand Down
34 changes: 21 additions & 13 deletions autogpt_platform/frontend/src/components/ui/render.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ const isValidVideoUrl = (url: string): boolean => {
};

const isValidImageUrl = (url: string): boolean => {
if (url.startsWith("data:image/")) {
return true;
}
const imageExtensions = /\.(jpeg|jpg|gif|png|svg|webp)$/i;
const cleanedUrl = url.split("?")[0];
return imageExtensions.test(cleanedUrl);
Expand Down Expand Up @@ -50,19 +53,21 @@ const VideoRenderer: React.FC<{ videoUrl: string }> = ({ videoUrl }) => {
);
};

const ImageRenderer: React.FC<{ imageUrl: string }> = ({ imageUrl }) => (
<div className="w-full p-2">
<picture>
<img
src={imageUrl}
alt="Image"
className="h-auto max-w-full"
width="100%"
height="auto"
/>
</picture>
</div>
);
const ImageRenderer: React.FC<{ imageUrl: string }> = ({ imageUrl }) => {
return (
<div className="w-full p-2">
<picture>
<img
src={imageUrl}
alt="Image"
className="h-auto max-w-full"
width="100%"
height="auto"
/>
</picture>
</div>
);
};

const AudioRenderer: React.FC<{ audioUrl: string }> = ({ audioUrl }) => (
<div className="w-full p-2">
Expand Down Expand Up @@ -92,6 +97,9 @@ export const ContentRenderer: React.FC<{
truncateLongData?: boolean;
}> = ({ value, truncateLongData }) => {
if (typeof value === "string") {
if (value.startsWith("data:image/")) {
return <ImageRenderer imageUrl={value} />;
}
if (isValidVideoUrl(value)) {
return <VideoRenderer videoUrl={value} />;
} else if (isValidImageUrl(value)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ export const PROVIDER_NAMES = {
JINA: "jina",
MEDIUM: "medium",
NOTION: "notion",
NVIDIA: "nvidia",
OLLAMA: "ollama",
OPENAI: "openai",
OPENWEATHERMAP: "openweathermap",
Expand Down

0 comments on commit b558cca

Please sign in to comment.