[Feat] add modelscope download video datasets (open-compass#623)

* add modelscope download * fix lint * fix lint * [Improvement] Update --------- Co-authored-by: Haodong Duan <[email protected]>
Myhs-phz · Nov 25, 2024 · d590830 · d590830
1 parent 82b631a
commit d590830
Show file tree

Hide file tree

Showing 8 changed files with 108 additions and 34 deletions.
diff --git a/vlmeval/dataset/longvideobench.py b/vlmeval/dataset/longvideobench.py
@@ -121,6 +121,9 @@ def check_integrity(pth):
                     return False
             return True
 
+        if modelscope_flag_set():
+            repo_id = "AI-ModelScope/LongVideoBench"
+
         cache_path = get_cache_path(repo_id)
         if cache_path is not None and check_integrity(cache_path):
             dataset_path = cache_path
@@ -137,7 +140,11 @@ def generate_tsv(pth):
 
                 data_file.to_csv(osp.join(pth, f'{dataset_name}.tsv'), sep='\t', index=False)
 
-            snapshot_download(repo_id=repo_id, repo_type='dataset')
+            if modelscope_flag_set():
+                from modelscope import dataset_snapshot_download
+                dataset_snapshot_download(dataset_id=repo_id)
+            else:
+                snapshot_download(repo_id=repo_id, repo_type='dataset')
             print("All videos are downloaded for LongVideoBench")
 
             if not glob(osp.join(cache_path, "videos")):

diff --git a/vlmeval/dataset/mlvu.py b/vlmeval/dataset/mlvu.py
@@ -95,6 +95,9 @@ def check_integrity(pth):
                     return False
             return True
 
+        if modelscope_flag_set():
+            repo_id = "AI-ModelScope/MLVU"
+
         cache_path = get_cache_path(repo_id)
         if cache_path is not None and check_integrity(cache_path):
             dataset_path = cache_path
@@ -123,9 +126,14 @@ def generate_tsv(pth):
                 data_df = data_df.assign(index=range(len(data_df)))
                 data_df.to_csv(data_file, sep='\t', index=False)
 
-            hf_token = os.environ.get('HUGGINGFACE_TOKEN')
-            huggingface_hub.login(hf_token)
-            dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
+            if modelscope_flag_set():
+                from modelscope import dataset_snapshot_download
+                dataset_path = dataset_snapshot_download(dataset_id=repo_id)
+            else:
+                hf_token = os.environ.get('HUGGINGFACE_TOKEN')
+                huggingface_hub.login(hf_token)
+                dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
+
             generate_tsv(dataset_path)
 
         data_file = osp.join(dataset_path, f'{dataset_name}.tsv')
@@ -298,6 +306,9 @@ def check_integrity(pth):
                     return False
             return True
 
+        if modelscope_flag_set():
+            repo_id = "AI-ModelScope/MLVU"
+
         cache_path = get_cache_path(repo_id)
         if cache_path is not None and check_integrity(cache_path):
             dataset_path = cache_path
@@ -326,9 +337,14 @@ def generate_tsv(pth):
                 data_df = data_df.assign(index=range(len(data_df)))
                 data_df.to_csv(data_file, sep='\t', index=False)
 
-            hf_token = os.environ.get('HUGGINGFACE_TOKEN')
-            huggingface_hub.login(hf_token)
-            dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
+            if modelscope_flag_set():
+                from modelscope import dataset_snapshot_download
+                dataset_path = dataset_snapshot_download(dataset_id=repo_id)
+            else:
+                hf_token = os.environ.get('HUGGINGFACE_TOKEN')
+                huggingface_hub.login(hf_token)
+                dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
+
             generate_tsv(dataset_path)
 
         data_file = osp.join(dataset_path, f'{dataset_name}.tsv')

diff --git a/vlmeval/dataset/mmbench_video.py b/vlmeval/dataset/mmbench_video.py
@@ -81,7 +81,11 @@ def check_integrity(pth):
         if cache_path is not None and check_integrity(cache_path):
             dataset_path = cache_path
         else:
-            dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
+            if modelscope_flag_set():
+                from modelscope import dataset_snapshot_download
+                dataset_path = dataset_snapshot_download(dataset_id=repo_id)
+            else:
+                dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
             unwrap_hf_pkl(dataset_path)
         self.video_path = osp.join(dataset_path, 'video/')
         data_file = osp.join(dataset_path, f'{dataset_name}.tsv')

diff --git a/vlmeval/dataset/mvbench.py b/vlmeval/dataset/mvbench.py
@@ -96,6 +96,9 @@ def check_integrity(pth):
                     return False
             return True
 
+        if modelscope_flag_set():
+            repo_id = 'modelscope/MVBench'
+
         cache_path = get_cache_path(repo_id, branch='main')
         if cache_path is not None and check_integrity(cache_path):
             dataset_path = cache_path
@@ -148,7 +151,6 @@ def generate_tsv(pth):
                 data_df.to_csv(data_file, sep='\t', index=False)
 
             def move_files(pth):
-                # special for mvbench/data0613 supplementary data
                 src_folder = os.path.join(pth, 'video/data0613')
                 if not os.path.exists(src_folder):
                     return
@@ -162,11 +164,20 @@ def move_files(pth):
                                     item_path = os.path.join(subsubdir_path, item)
                                     target_folder = os.path.join(pth, 'video', subdir, subsubdir)
                                     if not os.path.exists(target_folder):
-                                        shutil.move(item_path, target_folder)
-
-            hf_token = os.environ.get('HUGGINGFACE_TOKEN')
-            huggingface_hub.login(hf_token)
-            dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
+                                        os.makedirs(target_folder)
+                                    target_path = os.path.join(target_folder, item)
+                                    try:
+                                        shutil.move(item_path, target_path)
+                                    except Exception as e:
+                                        print(f"Error moving {item_path} to {target_path}: {e}")
+
+            if modelscope_flag_set():
+                from modelscope import dataset_snapshot_download
+                dataset_path = dataset_snapshot_download(dataset_id=repo_id, revision='master')
+            else:
+                hf_token = os.environ.get('HUGGINGFACE_TOKEN')
+                huggingface_hub.login(hf_token)
+                dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
             unzip_hf_zip(dataset_path)
             move_files(dataset_path)
             generate_tsv(dataset_path)
@@ -423,7 +434,7 @@ def evaluate(self, eval_file, **judge_kwargs):
 
 class MVBench_MP4(VideoBaseDataset):
 
-    MP4_MD5 = '7b4608045347904c28c153015a7a2b6b'
+    MP4_MD5 = '5c8c6f8b7972c2de65a629590f7c42f5'
     SYS = """Carefully watch the video and pay attention to the cause and sequence of events, \
 the detail and movement of objects, and the action and pose of persons. \
 Based on your observations, select the best option that accurately addresses the question.
@@ -453,13 +464,16 @@ def check_integrity(pth):
                     return False
             return True
 
+        if modelscope_flag_set():
+            repo_id = 'modelscope/MVBench'
+
         cache_path = get_cache_path(repo_id, branch='video')
         if cache_path is not None and check_integrity(cache_path):
             dataset_path = cache_path
         else:
             def generate_tsv(pth):
                 data_file = osp.join(pth, f'{dataset_name}.tsv')
-                if os.path.exists(data_file) and md5(data_file) == self.MD5:
+                if os.path.exists(data_file) and md5(data_file) == self.MP4_MD5:
                     return
                 json_data_path = os.path.join(dataset_path, 'test.json')
                 json_data = load(json_data_path)
@@ -479,9 +493,13 @@ def generate_tsv(pth):
                 data_df = data_df.assign(index=range(len(data_df)))
                 data_df.to_csv(data_file, sep='\t', index=False)
 
-            hf_token = os.environ.get('HUGGINGFACE_TOKEN')
-            huggingface_hub.login(hf_token)
-            dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset', revision='video')
+            if modelscope_flag_set():
+                from modelscope import dataset_snapshot_download
+                dataset_path = dataset_snapshot_download(dataset_id=repo_id, revision='video')
+            else:
+                hf_token = os.environ.get('HUGGINGFACE_TOKEN')
+                huggingface_hub.login(hf_token)
+                dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset', revision='video')
             generate_tsv(dataset_path)
 
         data_file = osp.join(dataset_path, f'{dataset_name}.tsv')

diff --git a/vlmeval/dataset/tempcompass.py b/vlmeval/dataset/tempcompass.py
@@ -131,7 +131,11 @@ def generate_tsv(pth):
                 data_df = data_df.assign(index=range(len(data_df)))
                 data_df.to_csv(data_file, sep='\t', index=False)
 
-            dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
+            if modelscope_flag_set():
+                from modelscope import dataset_snapshot_download
+                dataset_path = dataset_snapshot_download(dataset_id=repo_id)
+            else:
+                dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
             read_parquet(dataset_path)
             unzip_videos(dataset_path)
             generate_tsv(dataset_path)
@@ -322,7 +326,11 @@ def generate_tsv(pth):
                 data_df = data_df.assign(index=range(len(data_df)))
                 data_df.to_csv(data_file, sep='\t', index=False)
 
-            dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
+            if modelscope_flag_set():
+                from modelscope import dataset_snapshot_download
+                dataset_path = dataset_snapshot_download(dataset_id=repo_id)
+            else:
+                dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
             read_parquet(dataset_path)
             unzip_videos(dataset_path)
             generate_tsv(dataset_path)
@@ -510,7 +518,11 @@ def generate_tsv(pth):
                 data_df = data_df.assign(index=range(len(data_df)))
                 data_df.to_csv(data_file, sep='\t', index=False)
 
-            dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
+            if modelscope_flag_set():
+                from modelscope import dataset_snapshot_download
+                dataset_path = dataset_snapshot_download(dataset_id=repo_id)
+            else:
+                dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
             read_parquet(dataset_path)
             unzip_videos(dataset_path)
             generate_tsv(dataset_path)

diff --git a/vlmeval/dataset/video_base.py b/vlmeval/dataset/video_base.py
@@ -76,7 +76,7 @@ def save_video_frames(self, video, num_frames=8, fps=-1):
             indices = [int(i * step_size) for i in range(required_frames)]
 
             # 提取帧并保存
-            frame_paths = self.frame_paths_fps(video, len(izhendices), fps)
+            frame_paths = self.frame_paths_fps(video, len(indices), fps)
             flag = np.all([osp.exists(p) for p in frame_paths])
             if flag:
                 return frame_paths

diff --git a/vlmeval/dataset/videomme.py b/vlmeval/dataset/videomme.py
@@ -140,7 +140,11 @@ def generate_tsv(pth):
 
                 data_file.to_csv(osp.join(pth, f'{dataset_name}.tsv'), sep='\t', index=False)
 
-            dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
+            if modelscope_flag_set():
+                from modelscope import dataset_snapshot_download
+                dataset_path = dataset_snapshot_download(dataset_id=repo_id)
+            else:
+                dataset_path = snapshot_download(repo_id=repo_id, repo_type='dataset')
             unzip_hf_zip(dataset_path)
             generate_tsv(dataset_path)
 

diff --git a/vlmeval/smp/misc.py b/vlmeval/smp/misc.py
@@ -24,6 +24,11 @@
 from huggingface_hub.utils._cache_manager import _scan_cached_repo
 from sty import fg, bg, ef, rs
 
+
+def modelscope_flag_set():
+    return os.environ.get('VLMEVALKIT_USE_MODELSCOPE', None) in ['1', 'True']
+
+
 def process_punctuation(inText):
     import re
     outText = inText
@@ -74,16 +79,24 @@ def bincount(lst):
 
 def get_cache_path(repo_id, branch='main', repo_type='datasets'):
     try:
-        from .file import HFCacheRoot
-        cache_path = HFCacheRoot()
-        org, repo_name = repo_id.split('/')
-        repo_path = Path(osp.join(cache_path, f'{repo_type}--{org}--{repo_name}/'))
-        hf_cache_info = _scan_cached_repo(repo_path=repo_path)
-        revs = {r.refs: r for r in hf_cache_info.revisions}
-        if branch is not None:
-            revs = {refs: r for refs, r in revs.items() if branch in refs}
-        rev2keep = max(revs.values(), key=lambda r: r.last_modified)
-        return str(rev2keep.snapshot_path)
+        if modelscope_flag_set():
+            from modelscope.hub.file_download import create_temporary_directory_and_cache
+            if repo_type == 'datasets':
+                repo_type = 'dataset'
+            _, cache = create_temporary_directory_and_cache(model_id=repo_id, repo_type=repo_type)
+            cache_path = cache.get_root_location()
+            return cache_path
+        else:
+            from .file import HFCacheRoot
+            cache_path = HFCacheRoot()
+            org, repo_name = repo_id.split('/')
+            repo_path = Path(osp.join(cache_path, f'{repo_type}--{org}--{repo_name}/'))
+            hf_cache_info = _scan_cached_repo(repo_path=repo_path)
+            revs = {r.refs: r for r in hf_cache_info.revisions}
+            if branch is not None:
+                revs = {refs: r for refs, r in revs.items() if branch in refs}
+            rev2keep = max(revs.values(), key=lambda r: r.last_modified)
+            return str(rev2keep.snapshot_path)
     except Exception as e:
         import logging
         logging.warning(f'{type(e)}: {e}')