[Fix] Update SenseChat Vision

Myhs-phz · Dec 11, 2024 · dda5784 · dda5784
1 parent 854caac
commit dda5784
Showing 1 changed file with 13 additions and 9 deletions.
diff --git a/vlmeval/api/sensechat_vision.py b/vlmeval/api/sensechat_vision.py
@@ -40,7 +40,7 @@ def dump_image(self, line, dataset):
         """
         ROOT = LMUDataRoot()
         assert isinstance(dataset, str)
-        img_root = osp.join(ROOT, 'images', img_root_map[dataset] if dataset in img_root_map else dataset)
+        img_root = osp.join(ROOT, 'images', img_root_map(dataset))
         os.makedirs(img_root, exist_ok=True)
         if 'image' in line:
             if isinstance(line['image'], list):
@@ -141,8 +141,8 @@ def build_prompt(self, line, dataset=None):
             for key, item in options.items():
                 question += f'\n{key}. {item}'
             prompt = {
-                'multiple-choice': 'You are an expert in {}. Please solve the university-level {} examination question, which includes interleaved images and text. Your output should be divided into two parts: First, reason about the correct answer. Then write the answer in the following format where X is exactly one of the choices given by the problem: "ANSWER: X". If you are uncertain of the correct answer, guess the most likely one.',  # noqa: E501
-                'open': 'You are an expert in {}. Please solve the university-level {} examination question, which includes interleaved images and text. Your output should be divided into two parts: First, reason about the correct answer. Then write the answer in the following format where X is only the answer and nothing else: "ANSWER: X"'  # noqa: E501
+                'multiple-choice': 'Answer with carefully thought step by step. Apply the thinking process recursively at both macro and micro levels. Verify consistency of reasoning and look for potential flaws or gaps during thinking. When realize mistakes, explain why the previous thinking was incorrect, fix it and then continue thinking.\n\n',  # noqa
+                'open': 'Answer with carefully thought step by step. Apply the thinking process recursively at both macro and micro levels. Verify consistency of reasoning and look for potential flaws or gaps during thinking. When realize mistakes, explain why the previous thinking was incorrect, fix it and then continue thinking.\n\n'  # noqa
             }
             subject = '_'.join(line['id'].split('_')[1:-1])
             prompt = prompt[line['question_type']].format(subject, subject) + '\n' + question
@@ -168,7 +168,7 @@ def generate_inner(self, inputs, **kwargs) -> str:
         inputs = [inputs] if isinstance(inputs, str) else inputs
         dataset = kwargs.get('dataset', None)
 
-        if dataset is not None and listinstr(['ChartQA_TEST'], dataset):
+        if dataset is not None and listinstr(['ChartQA_TEST','MathVista_MINI'], dataset):
             self.max_num = 12
         elif dataset is not None and listinstr(['DocVQA_VAL', 'DocVQA_TEST'], dataset):
             self.max_num = 18
@@ -182,9 +182,11 @@ def generate_inner(self, inputs, **kwargs) -> str:
         elif listinstr(['AI2D_TEST'], dataset):
             self.max_new_tokens = 10
         elif 'MMMU' in dataset:
-            self.max_new_tokens = 1024
+            self.max_new_tokens = 4096  # 1024
         elif 'MMBench' in dataset:
             self.max_new_tokens = 100
+        elif 'MathVista_MINI' in dataset:
+            self.max_new_tokens = 4096
 
         prompt, image = self.message_to_promptimg(message=inputs, dataset=dataset)
 
@@ -212,7 +214,11 @@ def generate_inner(self, inputs, **kwargs) -> str:
 
         data = {
             'messages': message,
-            'max_new_tokens': self.max_new_tokens,
+            'max_new_tokens': self.max_new_tokens,  # 1024
+            'temperature': 0,
+            "top_k": 0,
+            "top_p": 0.99,
+            'repetition_penalty': 1.05,
             'model': self.model,
             'stream': False,
         }
@@ -232,16 +238,14 @@ def generate_inner(self, inputs, **kwargs) -> str:
         try:
             assert response.status_code == 200
             response = response.json()['data']['choices'][0]['message'].strip()
-            if dataset is not None and 'MMMU' in dataset:
-                response = response.split('ANSWER: ')[-1].strip()
             if self.verbose:
                 self.logger.info(f'inputs: {inputs}\nanswer: {response}')
             return 0, response, 'Succeeded! '
         except Exception as err:
             if self.verbose:
                 self.logger.error('---------------------------ERROR---------------------------')
                 self.logger.error(response.json())
-                self.logger.error(f'{type(err)}: {err}')
+                self.logger.error(err)
                 self.logger.error('---------------------------request_id---------------------------' + request_id)
                 self.logger.error(
                     'api error' + response.json()['error']['message']