Skip to content

Commit

Permalink
[Feat] add modelscope download video datasets (open-compass#623)
Browse files Browse the repository at this point in the history
* add modelscope download

* fix lint

* fix lint

* [Improvement] Update

---------

Co-authored-by: Haodong Duan <[email protected]>
  • Loading branch information
Yunnglin and kennymckormick authored Nov 25, 2024
1 parent 82b631a commit d590830
Show file tree
Hide file tree
Showing 8 changed files with 108 additions and 34 deletions.
9 changes: 8 additions & 1 deletion vlmeval/dataset/longvideobench.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ def check_integrity(pth):
return False
return True

if modelscope_flag_set():
repo_id = "AI-ModelScope/LongVideoBench"

cache_path = get_cache_path(repo_id)
if cache_path is not None and check_integrity(cache_path):
dataset_path = cache_path
Expand All @@ -137,7 +140,11 @@ def generate_tsv(pth):

data_file.to_csv(osp.join(pth, f'{dataset_name}.tsv'), sep='\t', index=False)

snapshot_download(repo_id=repo_id, repo_type='dataset')
if modelscope_flag_set():
from modelscope import dataset_snapshot_download
dataset_snapshot_download(dataset_id=repo_id)
else:
snapshot_download(repo_id=repo_id, repo_type='dataset')
print("All videos are downloaded for LongVideoBench")

if not glob(osp.join(cache_path, "videos")):
Expand Down
28 changes: 22 additions & 6 deletions vlmeval/dataset/mlvu.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ def check_integrity(pth):
return False
return True

if modelscope_flag_set():
repo_id = "AI-ModelScope/MLVU"

cache_path = get_cache_path(repo_id)
if cache_path is not None and check_integrity(cache_path):
dataset_path = cache_path
Expand Down Expand Up @@ -123,9 +126,14 @@ def generate_tsv(pth):
data_df = data_df.assign(index=range(len(data_df)))
data_df.to_csv(data_file, sep='\t', index=False)

hf_token = os.environ.get('HUGGINGFACE_TOKEN')
huggingface_hub.login(hf_token)
dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
if modelscope_flag_set():
from modelscope import dataset_snapshot_download
dataset_path = dataset_snapshot_download(dataset_id=repo_id)
else:
hf_token = os.environ.get('HUGGINGFACE_TOKEN')
huggingface_hub.login(hf_token)
dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')

generate_tsv(dataset_path)

data_file = osp.join(dataset_path, f'{dataset_name}.tsv')
Expand Down Expand Up @@ -298,6 +306,9 @@ def check_integrity(pth):
return False
return True

if modelscope_flag_set():
repo_id = "AI-ModelScope/MLVU"

cache_path = get_cache_path(repo_id)
if cache_path is not None and check_integrity(cache_path):
dataset_path = cache_path
Expand Down Expand Up @@ -326,9 +337,14 @@ def generate_tsv(pth):
data_df = data_df.assign(index=range(len(data_df)))
data_df.to_csv(data_file, sep='\t', index=False)

hf_token = os.environ.get('HUGGINGFACE_TOKEN')
huggingface_hub.login(hf_token)
dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
if modelscope_flag_set():
from modelscope import dataset_snapshot_download
dataset_path = dataset_snapshot_download(dataset_id=repo_id)
else:
hf_token = os.environ.get('HUGGINGFACE_TOKEN')
huggingface_hub.login(hf_token)
dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')

generate_tsv(dataset_path)

data_file = osp.join(dataset_path, f'{dataset_name}.tsv')
Expand Down
6 changes: 5 additions & 1 deletion vlmeval/dataset/mmbench_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,11 @@ def check_integrity(pth):
if cache_path is not None and check_integrity(cache_path):
dataset_path = cache_path
else:
dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
if modelscope_flag_set():
from modelscope import dataset_snapshot_download
dataset_path = dataset_snapshot_download(dataset_id=repo_id)
else:
dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
unwrap_hf_pkl(dataset_path)
self.video_path = osp.join(dataset_path, 'video/')
data_file = osp.join(dataset_path, f'{dataset_name}.tsv')
Expand Down
40 changes: 29 additions & 11 deletions vlmeval/dataset/mvbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ def check_integrity(pth):
return False
return True

if modelscope_flag_set():
repo_id = 'modelscope/MVBench'

cache_path = get_cache_path(repo_id, branch='main')
if cache_path is not None and check_integrity(cache_path):
dataset_path = cache_path
Expand Down Expand Up @@ -148,7 +151,6 @@ def generate_tsv(pth):
data_df.to_csv(data_file, sep='\t', index=False)

def move_files(pth):
# special for mvbench/data0613 supplementary data
src_folder = os.path.join(pth, 'video/data0613')
if not os.path.exists(src_folder):
return
Expand All @@ -162,11 +164,20 @@ def move_files(pth):
item_path = os.path.join(subsubdir_path, item)
target_folder = os.path.join(pth, 'video', subdir, subsubdir)
if not os.path.exists(target_folder):
shutil.move(item_path, target_folder)

hf_token = os.environ.get('HUGGINGFACE_TOKEN')
huggingface_hub.login(hf_token)
dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
os.makedirs(target_folder)
target_path = os.path.join(target_folder, item)
try:
shutil.move(item_path, target_path)
except Exception as e:
print(f"Error moving {item_path} to {target_path}: {e}")

if modelscope_flag_set():
from modelscope import dataset_snapshot_download
dataset_path = dataset_snapshot_download(dataset_id=repo_id, revision='master')
else:
hf_token = os.environ.get('HUGGINGFACE_TOKEN')
huggingface_hub.login(hf_token)
dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
unzip_hf_zip(dataset_path)
move_files(dataset_path)
generate_tsv(dataset_path)
Expand Down Expand Up @@ -423,7 +434,7 @@ def evaluate(self, eval_file, **judge_kwargs):

class MVBench_MP4(VideoBaseDataset):

MP4_MD5 = '7b4608045347904c28c153015a7a2b6b'
MP4_MD5 = '5c8c6f8b7972c2de65a629590f7c42f5'
SYS = """Carefully watch the video and pay attention to the cause and sequence of events, \
the detail and movement of objects, and the action and pose of persons. \
Based on your observations, select the best option that accurately addresses the question.
Expand Down Expand Up @@ -453,13 +464,16 @@ def check_integrity(pth):
return False
return True

if modelscope_flag_set():
repo_id = 'modelscope/MVBench'

cache_path = get_cache_path(repo_id, branch='video')
if cache_path is not None and check_integrity(cache_path):
dataset_path = cache_path
else:
def generate_tsv(pth):
data_file = osp.join(pth, f'{dataset_name}.tsv')
if os.path.exists(data_file) and md5(data_file) == self.MD5:
if os.path.exists(data_file) and md5(data_file) == self.MP4_MD5:
return
json_data_path = os.path.join(dataset_path, 'test.json')
json_data = load(json_data_path)
Expand All @@ -479,9 +493,13 @@ def generate_tsv(pth):
data_df = data_df.assign(index=range(len(data_df)))
data_df.to_csv(data_file, sep='\t', index=False)

hf_token = os.environ.get('HUGGINGFACE_TOKEN')
huggingface_hub.login(hf_token)
dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset', revision='video')
if modelscope_flag_set():
from modelscope import dataset_snapshot_download
dataset_path = dataset_snapshot_download(dataset_id=repo_id, revision='video')
else:
hf_token = os.environ.get('HUGGINGFACE_TOKEN')
huggingface_hub.login(hf_token)
dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset', revision='video')
generate_tsv(dataset_path)

data_file = osp.join(dataset_path, f'{dataset_name}.tsv')
Expand Down
18 changes: 15 additions & 3 deletions vlmeval/dataset/tempcompass.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,11 @@ def generate_tsv(pth):
data_df = data_df.assign(index=range(len(data_df)))
data_df.to_csv(data_file, sep='\t', index=False)

dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
if modelscope_flag_set():
from modelscope import dataset_snapshot_download
dataset_path = dataset_snapshot_download(dataset_id=repo_id)
else:
dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
read_parquet(dataset_path)
unzip_videos(dataset_path)
generate_tsv(dataset_path)
Expand Down Expand Up @@ -322,7 +326,11 @@ def generate_tsv(pth):
data_df = data_df.assign(index=range(len(data_df)))
data_df.to_csv(data_file, sep='\t', index=False)

dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
if modelscope_flag_set():
from modelscope import dataset_snapshot_download
dataset_path = dataset_snapshot_download(dataset_id=repo_id)
else:
dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
read_parquet(dataset_path)
unzip_videos(dataset_path)
generate_tsv(dataset_path)
Expand Down Expand Up @@ -510,7 +518,11 @@ def generate_tsv(pth):
data_df = data_df.assign(index=range(len(data_df)))
data_df.to_csv(data_file, sep='\t', index=False)

dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
if modelscope_flag_set():
from modelscope import dataset_snapshot_download
dataset_path = dataset_snapshot_download(dataset_id=repo_id)
else:
dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
read_parquet(dataset_path)
unzip_videos(dataset_path)
generate_tsv(dataset_path)
Expand Down
2 changes: 1 addition & 1 deletion vlmeval/dataset/video_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def save_video_frames(self, video, num_frames=8, fps=-1):
indices = [int(i * step_size) for i in range(required_frames)]

# 提取帧并保存
frame_paths = self.frame_paths_fps(video, len(izhendices), fps)
frame_paths = self.frame_paths_fps(video, len(indices), fps)
flag = np.all([osp.exists(p) for p in frame_paths])
if flag:
return frame_paths
Expand Down
6 changes: 5 additions & 1 deletion vlmeval/dataset/videomme.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,11 @@ def generate_tsv(pth):

data_file.to_csv(osp.join(pth, f'{dataset_name}.tsv'), sep='\t', index=False)

dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
if modelscope_flag_set():
from modelscope import dataset_snapshot_download
dataset_path = dataset_snapshot_download(dataset_id=repo_id)
else:
dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
unzip_hf_zip(dataset_path)
generate_tsv(dataset_path)

Expand Down
33 changes: 23 additions & 10 deletions vlmeval/smp/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@
from huggingface_hub.utils._cache_manager import _scan_cached_repo
from sty import fg, bg, ef, rs


def modelscope_flag_set():
return os.environ.get('VLMEVALKIT_USE_MODELSCOPE', None) in ['1', 'True']


def process_punctuation(inText):
import re
outText = inText
Expand Down Expand Up @@ -74,16 +79,24 @@ def bincount(lst):

def get_cache_path(repo_id, branch='main', repo_type='datasets'):
try:
from .file import HFCacheRoot
cache_path = HFCacheRoot()
org, repo_name = repo_id.split('/')
repo_path = Path(osp.join(cache_path, f'{repo_type}--{org}--{repo_name}/'))
hf_cache_info = _scan_cached_repo(repo_path=repo_path)
revs = {r.refs: r for r in hf_cache_info.revisions}
if branch is not None:
revs = {refs: r for refs, r in revs.items() if branch in refs}
rev2keep = max(revs.values(), key=lambda r: r.last_modified)
return str(rev2keep.snapshot_path)
if modelscope_flag_set():
from modelscope.hub.file_download import create_temporary_directory_and_cache
if repo_type == 'datasets':
repo_type = 'dataset'
_, cache = create_temporary_directory_and_cache(model_id=repo_id, repo_type=repo_type)
cache_path = cache.get_root_location()
return cache_path
else:
from .file import HFCacheRoot
cache_path = HFCacheRoot()
org, repo_name = repo_id.split('/')
repo_path = Path(osp.join(cache_path, f'{repo_type}--{org}--{repo_name}/'))
hf_cache_info = _scan_cached_repo(repo_path=repo_path)
revs = {r.refs: r for r in hf_cache_info.revisions}
if branch is not None:
revs = {refs: r for refs, r in revs.items() if branch in refs}
rev2keep = max(revs.values(), key=lambda r: r.last_modified)
return str(rev2keep.snapshot_path)
except Exception as e:
import logging
logging.warning(f'{type(e)}: {e}')
Expand Down

0 comments on commit d590830

Please sign in to comment.