Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Render first rows video urls #3095

Merged
merged 2 commits into from
Oct 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion libs/libapi/src/libapi/rows_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ async def transform_rows(
offset=offset,
row_idx_column=row_idx_column,
)
if "Audio(" in str(features) or "Image(" in str(features) or "Video(" in str(features):
if "Audio(" in str(features) or "Image(" in str(features):
# Use multithreading to parallelize image/audio files uploads.
# Also multithreading is ok to convert audio data
# (we use pydub which might spawn one ffmpeg process per conversion, which releases the GIL)
Expand Down
7 changes: 5 additions & 2 deletions libs/libcommon/src/libcommon/url_preparator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from dataclasses import dataclass
from typing import Any, Callable, Literal, Optional, Union

from datasets import Audio, Features, Image
from datasets import Audio, Features, Image, Video
from datasets.features.features import FeatureType, Sequence

from libcommon.cloudfront import CloudFrontSigner
Expand All @@ -23,7 +23,7 @@ class InvalidFirstRowsError(ValueError):

@dataclass
class AssetUrlPath:
feature_type: Literal["Audio", "Image"]
feature_type: Literal["Audio", "Image", "Video"]
path: VisitPath

def enter(self) -> "AssetUrlPath":
Expand Down Expand Up @@ -70,7 +70,10 @@ def classify(feature: FeatureType, visit_path: VisitPath) -> None:
if isinstance(feature, Image):
asset_url_paths.append(AssetUrlPath(feature_type="Image", path=visit_path))
elif isinstance(feature, Audio):
# for audio we give a list in case there are multiple formats available
asset_url_paths.append(AssetUrlPath(feature_type="Audio", path=visit_path + [0]))
elif isinstance(feature, Video):
asset_url_paths.append(AssetUrlPath(feature_type="Video", path=visit_path))

_visit(feature, classify, [column])
return asset_url_paths
Expand Down
Loading