From 63bc8edf74e90e81d12356c37dda9cf4628a0d83 Mon Sep 17 00:00:00 2001
From: kennymckormick <dhd.efz@gmail.com>
Date: Wed, 8 Jan 2025 03:30:19 +0000
Subject: [PATCH] [Improvement] More Robust Auto Eval Service for MMBench

---
 scripts/mmb_eval_gradio.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/scripts/mmb_eval_gradio.py b/scripts/mmb_eval_gradio.py
index 33a9806f4..4b7a127bd 100644
--- a/scripts/mmb_eval_gradio.py
+++ b/scripts/mmb_eval_gradio.py
@@ -1,5 +1,6 @@
 from vlmeval.smp import *
 from vlmeval.tools import EVAL
+from vlmeval.dataset import build_dataset
 import gradio as gr
 
 HEADER = """
@@ -81,6 +82,18 @@ def evaluate(file):
     ret = f"Evaluation ID: {eval_id}\n"
     timestamp = datetime.datetime.now().strftime('%Y.%m.%d  %H:%M:%S')
     ret += f'Evaluation Timestamp: {timestamp}\n'
+    eval_data = load(eval_file)
+    eval_data['index'] = [int(x) for x in eval_data['index']]
+    base_data = build_dataset(dataset).data
+    base_index_set = set([int(x) for x in base_data['index']])
+    inds_more = {k for k in eval_data['index'] if k not in base_index_set}
+    if len(inds_more) > 0:
+        inds_more = set([x % 1e6 for x in inds_more])
+        ret += f"Warning: The matched dataset is {dataset}. The following indices are not in the base dataset: {inds_more}\n"
+        ret += f"We automatically remove those indices, and still recommend you to check the indices in your prediction file.\n"
+        eval_data = eval_data[eval_data['index'].isin(base_index_set)]
+        dump(eval_data, eval_file)
+
     acc = EVAL(dataset, eval_file)
     nacc = reformat_acc(acc).round(1)
     return ret, nacc