fix: improve the quality of 1140 tasks

bigcode-project · May 11, 2024 · 656eef7 · 656eef7
1 parent 809f9e6
commit 656eef7
Show file tree

Hide file tree

Showing 1,447 changed files with 74,329 additions and 74,434 deletions.
diff --git a/data/clean/f_1008_zhihan_refined.py b/data/clean/f_1008_zhihan_refined.py
diff --git a/data/clean/f_1015_zhihan_refined.py b/data/clean/f_1015_zhihan_refined.py
@@ -143,7 +143,7 @@ def test_command_failure_with_specific_exit_code(self):
         self.assertEqual(len(result), 1)
         with open(os.path.join(self.output_dir_path, result[0]), "r") as f:
             content = f.read()
-            self.assertIn("Error executing command, exited with code 1", content)
+            self.assertIn("Error executing command", content)
 
 if __name__ == "__main__":
     run_tests()
diff --git a/data/clean/f_1016_zhihan_refined.py b/data/clean/f_1016_zhihan_refined.py
@@ -13,7 +13,7 @@ def f_1016(directory, backup_dir='/path/to/backup'):
                                   Default is '/path/to/backup'.
     
     Returns:
-    - str: The path to the backup file if logs are found, otherwise returns a message stating no logs were found.
+    - str: The path to the backup file if logs are found, otherwise returns a message 'No logs found to backup'.
     
     Raises:
     - FileNotFoundError: If the specified directory does not exist.

diff --git a/data/clean/f_1027_zhihan_refined.py b/data/clean/f_1027_zhihan_refined.py
@@ -5,7 +5,7 @@
 def f_1027(data, url="http://your-api-url.com"):
     """
     Convert a Python dictionary into a JSON-formatted string, encode this string in base64 format,
-    and send it as a payload in a POST request to an API endpoint.
+    and send it as a 'payload' in a POST request to an API endpoint.
     
     Parameters:
     data (dict): The Python dictionary to encode and send.
@@ -27,7 +27,7 @@ def f_1027(data, url="http://your-api-url.com"):
     """
     json_data = json.dumps(data)
     encoded_data = base64.b64encode(json_data.encode('ascii')).decode('ascii')
-    response = requests.post(url, data={"payload": encoded_data})
+    response = requests.post(url, json={"payload": encoded_data})
 
     return response
 
@@ -92,7 +92,10 @@ def test_case_6(self, mock_post_method):
         json_data = json.dumps(data)
         encoded_data = base64.b64encode(json_data.encode('ascii')).decode('ascii')
         f_1027(data, url="http://mock-api-url.com")
-        mock_post_method.assert_called_once_with("http://mock-api-url.com", data={"payload": encoded_data})
+        try:
+            mock_post_method.assert_called_once_with("http://mock-api-url.com", data={"payload": encoded_data})
+        except:
+            mock_post_method.assert_called_once_with("http://mock-api-url.com", json={"payload": encoded_data})
 
 
 if __name__ == "__main__":

diff --git a/data/clean/f_1031_zhihan_refined.py b/data/clean/f_1031_zhihan_refined.py
@@ -10,7 +10,7 @@ def f_1031(list_of_pairs):
                           and the second element is the numeric value.
     
     Returns:
-    numpy.ndarray: A numpy array containing a single element that is the product of the second values in the list of tuples.
+    numpy.ndarray: A 1D numpy array containing a single element that is the product of the second values in the list of tuples.
     
     Requirements:
     - numpy
@@ -22,11 +22,14 @@ def f_1031(list_of_pairs):
     >>> print(product_array)
     360
     """
-    second_values = [pair[1] for pair in list_of_pairs]
-    product = reduce(np.multiply, second_values)
-    product_array = np.array(product)
-
-    return product_array
+    # Extract the second element from each tuple using a list comprehension
+    values = [pair[1] for pair in list_of_pairs]
+
+    # Use reduce to calculate the product of all elements in the values list
+    product = reduce(lambda x, y: x * y, values)
+
+    # Return the result as a numpy array with a single element
+    return np.array([product])
 
 import unittest
 import numpy as np
@@ -45,6 +48,7 @@ def test_case_1(self):
         list_of_pairs = [('Fruits', 5), ('Vegetables', 9), ('Dairy', -1), ('Bakery', -2), ('Meat', 4)]
         expected_output = np.array(360)
         actual_output = f_1031(list_of_pairs)
+        print(actual_output, expected_output)
         self.assertTrue(np.array_equal(actual_output, expected_output))
 
     def test_case_2(self):

diff --git a/data/clean/f_119_armel.py b/data/clean/f_119_armel.py
@@ -21,7 +21,7 @@ def f_119(timestamps):
     - Axes: The Axes object of the histogram plot. The histogram will have 10 bins by default, representing the distribution of the datetime objects.
 
     Raises:
-    - ValueError("Input list of timestamps is empty"): If the list of timestamps is empty.
+    - ValueError("Input list of timestamps is empty."): If the list of timestamps is empty.
 
     Requirements:
     - datetime

diff --git a/data/clean/f_124_armel.py b/data/clean/f_124_armel.py
@@ -30,9 +30,18 @@ def f_124(text):
     words       1
     dtype: int64
     """
-    words = re.findall(r"\b\w+\b", text.lower())
-    words = [word for word in words if word not in STOPWORDS]
-    word_counts = pd.Series(words).value_counts().rename(None)
+    # Normalize the text to lowercase
+    text = text.lower()
+
+    # Use regex to find words, considering words as sequences of alphabetic characters
+    words = re.findall(r'\b\p{L}+\b', text)
+
+    # Filter out stopwords
+    filtered_words = [word for word in words if word not in STOPWORDS]
+
+    # Count the frequency of each word using pandas Series
+    word_counts = pd.Series(filtered_words).value_counts()
+
     return word_counts
 
 
@@ -44,31 +53,26 @@ class TestCases(unittest.TestCase):
 
     def test_case_1(self):
         text = "This is a sample text This text contains sample words"
-        word_counts = f_124(text)
-        expected_counts = pd.Series(
-            {"this": 2, "sample": 2, "text": 2, "contains": 1, "words": 1}
-        )
-        pd.testing.assert_series_equal(word_counts, expected_counts)
+        word_counts = f_124(text).to_dict()
+        expected_counts = {"this": 2, "sample": 2, "text": 2, "contains": 1, "words": 1}
+        self.assertDictEqual(word_counts, expected_counts)
 
     def test_case_2(self):
         text = "Hello world Hello everyone"
-        word_counts = f_124(text)
-        expected_counts = pd.Series({"hello": 2, "world": 1, "everyone": 1})
-        pd.testing.assert_series_equal(word_counts, expected_counts)
+        word_counts = f_124(text).to_dict()
+        expected_counts = {"hello": 2, "world": 1, "everyone": 1}
+        self.assertDictEqual(word_counts, expected_counts)
 
     def test_case_3(self):
         text = "a an the in is are"
-        word_counts = f_124(text)
-        expected_counts = pd.Series(dtype="int64")
-        pd.testing.assert_series_equal(
-            word_counts.reset_index(drop=True), expected_counts.reset_index(drop=True)
-        )
+        word_counts = f_124(text).to_dict()
+        expected_counts = {}
+        self.assertDictEqual(word_counts, expected_counts)
 
     def test_case_4(self):
         text = "This is a test sentence which has a bunch of words and no period"
-        word_counts = f_124(text)
-        expected_counts = pd.Series(
-            {
+        word_counts = f_124(text).to_dict()
+        expected_counts = {
                 "this": 1,
                 "test": 1,
                 "sentence": 1,
@@ -81,18 +85,16 @@ def test_case_4(self):
                 "no": 1,
                 "period": 1,
             }
-        )
-        pd.testing.assert_series_equal(word_counts, expected_counts)
+
+        self.assertDictEqual(word_counts, expected_counts)
 
     def test_case_5(self):
         text = (
             "I I I want want to to to to to go to to to the olympics olympics this year"
         )
-        word_counts = f_124(text)
-        expected_counts = pd.Series(
-            {"i": 3, "want": 2, "to": 8, "go": 1, "olympics": 2, "this": 1, "year": 1}
-        ).sort_values(ascending=False)
-        pd.testing.assert_series_equal(word_counts, expected_counts)
+        word_counts = f_124(text).to_dict()
+        expected_counts = {"i": 3, "want": 2, "to": 8, "go": 1, "olympics": 2, "this": 1, "year": 1}
+        self.assertDictEqual(word_counts, expected_counts)
 
 
 def run_tests():

diff --git a/data/clean/f_139_armel.py b/data/clean/f_139_armel.py
@@ -52,7 +52,7 @@ def test_case_1(self):
         expected_df = expected_df.pivot(index=COLUMNS[0], columns=COLUMNS[1], values=COLUMNS[2])
         # Assertions
         self.assertTrue(isinstance(analyzed_df, pd.DataFrame))
-        pd.testing.assert_frame_equal(analyzed_df, expected_df)
+        pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
         self.assertTrue(isinstance(ax, plt.Axes))
 
     def test_case_2(self):
@@ -73,7 +73,7 @@ def test_case_2(self):
         expected_df = expected_df.pivot(index=COLUMNS[0], columns=COLUMNS[1], values=COLUMNS[2])
         # Assertions
         self.assertTrue(isinstance(analyzed_df, pd.DataFrame))
-        pd.testing.assert_frame_equal(analyzed_df, expected_df)
+        pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
         self.assertTrue(isinstance(ax, plt.Axes))
 
     def test_case_3(self):
@@ -94,7 +94,7 @@ def test_case_3(self):
         expected_df = expected_df.pivot(index=COLUMNS[0], columns=COLUMNS[1], values=COLUMNS[2])
         # Assertions
         self.assertTrue(isinstance(analyzed_df, pd.DataFrame))
-        pd.testing.assert_frame_equal(analyzed_df, expected_df)
+        pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
         self.assertTrue(isinstance(ax, plt.Axes))
 
     def test_case_4(self):
@@ -111,7 +111,7 @@ def test_case_4(self):
         expected_df = expected_df.pivot(index=COLUMNS[0], columns=COLUMNS[1], values=COLUMNS[2])
         # Assertions
         self.assertTrue(isinstance(analyzed_df, pd.DataFrame))
-        pd.testing.assert_frame_equal(analyzed_df, expected_df)
+        pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
         self.assertTrue(isinstance(ax, plt.Axes))
 
     def test_case_5(self):
@@ -136,7 +136,7 @@ def test_case_5(self):
         expected_df = expected_df.pivot(index=COLUMNS[0], columns=COLUMNS[1], values=COLUMNS[2])
         # Assertions
         self.assertTrue(isinstance(analyzed_df, pd.DataFrame))
-        pd.testing.assert_frame_equal(analyzed_df, expected_df)
+        pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
         self.assertTrue(isinstance(ax, plt.Axes))
 
 def run_tests():

diff --git a/data/clean/f_140_armel.py b/data/clean/f_140_armel.py
@@ -54,7 +54,7 @@ def test_case_1(self):
         # Assertions for the returned DataFrame
         expected_data = [[1, 1, 2], [1, 2, 1], [2, 1, 3], [2, 2, 1]]
         expected_df = pd.DataFrame(expected_data, columns=COLUMNS)
-        pd.testing.assert_frame_equal(analyzed_df, expected_df)
+        pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
 
         # Assertions for the returned plot
         self.assertEqual(ax.get_xlabel(), 'col1-col2')
@@ -75,7 +75,7 @@ def test_case_2(self):
             [1, 3, 1]
         ]
         expected_df = pd.DataFrame(expected_data, columns=COLUMNS)
-        pd.testing.assert_frame_equal(analyzed_df, expected_df)
+        pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
         self.assertEqual(ax.get_xlabel(), 'col1-col2')
         self.assertEqual(ax.get_ylabel(), 'col3')
         self.assertListEqual(list(ax.lines[0].get_ydata()), [3, 1, 1])
@@ -95,7 +95,7 @@ def test_case_3(self):
             [2, 2, 1]
         ]
         expected_df = pd.DataFrame(expected_data, columns=COLUMNS)
-        pd.testing.assert_frame_equal(analyzed_df, expected_df)
+        pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
         self.assertEqual(ax.get_xlabel(), 'col1-col2')
         self.assertEqual(ax.get_ylabel(), 'col3')
         self.assertListEqual(list(ax.lines[0].get_ydata()), [1, 1, 1, 1])
@@ -111,7 +111,7 @@ def test_case_4(self):
             [1, 1, 1],
         ]
         expected_df = pd.DataFrame(expected_data, columns=COLUMNS)
-        pd.testing.assert_frame_equal(analyzed_df, expected_df)
+        pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
         self.assertEqual(ax.get_xlabel(), 'col1-col2')
         self.assertEqual(ax.get_ylabel(), 'col3')
         self.assertListEqual(list(ax.lines[0].get_ydata()), [1])
@@ -135,7 +135,7 @@ def test_case_5(self):
             [1, 1, 2]
         ]
         expected_df = pd.DataFrame(expected_data, columns=COLUMNS)
-        pd.testing.assert_frame_equal(analyzed_df, expected_df)
+        pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
         self.assertEqual(ax.get_xlabel(), 'col1-col2')
         self.assertEqual(ax.get_ylabel(), 'col3')
         self.assertListEqual(list(ax.lines[0].get_ydata()), [2, 2, 2, 2])