Skip to content

Commit

Permalink
fix: improve the quality of 1140 tasks
Browse files Browse the repository at this point in the history
  • Loading branch information
terryyz committed May 11, 2024
1 parent 809f9e6 commit 656eef7
Show file tree
Hide file tree
Showing 1,447 changed files with 74,329 additions and 74,434 deletions.
88 changes: 0 additions & 88 deletions data/clean/f_1008_zhihan_refined.py

This file was deleted.

2 changes: 1 addition & 1 deletion data/clean/f_1015_zhihan_refined.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def test_command_failure_with_specific_exit_code(self):
self.assertEqual(len(result), 1)
with open(os.path.join(self.output_dir_path, result[0]), "r") as f:
content = f.read()
self.assertIn("Error executing command, exited with code 1", content)
self.assertIn("Error executing command", content)

if __name__ == "__main__":
run_tests()
2 changes: 1 addition & 1 deletion data/clean/f_1016_zhihan_refined.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def f_1016(directory, backup_dir='/path/to/backup'):
Default is '/path/to/backup'.
Returns:
- str: The path to the backup file if logs are found, otherwise returns a message stating no logs were found.
- str: The path to the backup file if logs are found, otherwise returns a message 'No logs found to backup'.
Raises:
- FileNotFoundError: If the specified directory does not exist.
Expand Down
9 changes: 6 additions & 3 deletions data/clean/f_1027_zhihan_refined.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
def f_1027(data, url="http://your-api-url.com"):
"""
Convert a Python dictionary into a JSON-formatted string, encode this string in base64 format,
and send it as a payload in a POST request to an API endpoint.
and send it as a 'payload' in a POST request to an API endpoint.
Parameters:
data (dict): The Python dictionary to encode and send.
Expand All @@ -27,7 +27,7 @@ def f_1027(data, url="http://your-api-url.com"):
"""
json_data = json.dumps(data)
encoded_data = base64.b64encode(json_data.encode('ascii')).decode('ascii')
response = requests.post(url, data={"payload": encoded_data})
response = requests.post(url, json={"payload": encoded_data})

return response

Expand Down Expand Up @@ -92,7 +92,10 @@ def test_case_6(self, mock_post_method):
json_data = json.dumps(data)
encoded_data = base64.b64encode(json_data.encode('ascii')).decode('ascii')
f_1027(data, url="http://mock-api-url.com")
mock_post_method.assert_called_once_with("http://mock-api-url.com", data={"payload": encoded_data})
try:
mock_post_method.assert_called_once_with("http://mock-api-url.com", data={"payload": encoded_data})
except:
mock_post_method.assert_called_once_with("http://mock-api-url.com", json={"payload": encoded_data})


if __name__ == "__main__":
Expand Down
16 changes: 10 additions & 6 deletions data/clean/f_1031_zhihan_refined.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def f_1031(list_of_pairs):
and the second element is the numeric value.
Returns:
numpy.ndarray: A numpy array containing a single element that is the product of the second values in the list of tuples.
numpy.ndarray: A 1D numpy array containing a single element that is the product of the second values in the list of tuples.
Requirements:
- numpy
Expand All @@ -22,11 +22,14 @@ def f_1031(list_of_pairs):
>>> print(product_array)
360
"""
second_values = [pair[1] for pair in list_of_pairs]
product = reduce(np.multiply, second_values)
product_array = np.array(product)

return product_array
# Extract the second element from each tuple using a list comprehension
values = [pair[1] for pair in list_of_pairs]

# Use reduce to calculate the product of all elements in the values list
product = reduce(lambda x, y: x * y, values)

# Return the result as a numpy array with a single element
return np.array([product])

import unittest
import numpy as np
Expand All @@ -45,6 +48,7 @@ def test_case_1(self):
list_of_pairs = [('Fruits', 5), ('Vegetables', 9), ('Dairy', -1), ('Bakery', -2), ('Meat', 4)]
expected_output = np.array(360)
actual_output = f_1031(list_of_pairs)
print(actual_output, expected_output)
self.assertTrue(np.array_equal(actual_output, expected_output))

def test_case_2(self):
Expand Down
2 changes: 1 addition & 1 deletion data/clean/f_119_armel.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def f_119(timestamps):
- Axes: The Axes object of the histogram plot. The histogram will have 10 bins by default, representing the distribution of the datetime objects.
Raises:
- ValueError("Input list of timestamps is empty"): If the list of timestamps is empty.
- ValueError("Input list of timestamps is empty."): If the list of timestamps is empty.
Requirements:
- datetime
Expand Down
54 changes: 28 additions & 26 deletions data/clean/f_124_armel.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,18 @@ def f_124(text):
words 1
dtype: int64
"""
words = re.findall(r"\b\w+\b", text.lower())
words = [word for word in words if word not in STOPWORDS]
word_counts = pd.Series(words).value_counts().rename(None)
# Normalize the text to lowercase
text = text.lower()

# Use regex to find words, considering words as sequences of alphabetic characters
words = re.findall(r'\b\p{L}+\b', text)

# Filter out stopwords
filtered_words = [word for word in words if word not in STOPWORDS]

# Count the frequency of each word using pandas Series
word_counts = pd.Series(filtered_words).value_counts()

return word_counts


Expand All @@ -44,31 +53,26 @@ class TestCases(unittest.TestCase):

def test_case_1(self):
text = "This is a sample text This text contains sample words"
word_counts = f_124(text)
expected_counts = pd.Series(
{"this": 2, "sample": 2, "text": 2, "contains": 1, "words": 1}
)
pd.testing.assert_series_equal(word_counts, expected_counts)
word_counts = f_124(text).to_dict()
expected_counts = {"this": 2, "sample": 2, "text": 2, "contains": 1, "words": 1}
self.assertDictEqual(word_counts, expected_counts)

def test_case_2(self):
text = "Hello world Hello everyone"
word_counts = f_124(text)
expected_counts = pd.Series({"hello": 2, "world": 1, "everyone": 1})
pd.testing.assert_series_equal(word_counts, expected_counts)
word_counts = f_124(text).to_dict()
expected_counts = {"hello": 2, "world": 1, "everyone": 1}
self.assertDictEqual(word_counts, expected_counts)

def test_case_3(self):
text = "a an the in is are"
word_counts = f_124(text)
expected_counts = pd.Series(dtype="int64")
pd.testing.assert_series_equal(
word_counts.reset_index(drop=True), expected_counts.reset_index(drop=True)
)
word_counts = f_124(text).to_dict()
expected_counts = {}
self.assertDictEqual(word_counts, expected_counts)

def test_case_4(self):
text = "This is a test sentence which has a bunch of words and no period"
word_counts = f_124(text)
expected_counts = pd.Series(
{
word_counts = f_124(text).to_dict()
expected_counts = {
"this": 1,
"test": 1,
"sentence": 1,
Expand All @@ -81,18 +85,16 @@ def test_case_4(self):
"no": 1,
"period": 1,
}
)
pd.testing.assert_series_equal(word_counts, expected_counts)

self.assertDictEqual(word_counts, expected_counts)

def test_case_5(self):
text = (
"I I I want want to to to to to go to to to the olympics olympics this year"
)
word_counts = f_124(text)
expected_counts = pd.Series(
{"i": 3, "want": 2, "to": 8, "go": 1, "olympics": 2, "this": 1, "year": 1}
).sort_values(ascending=False)
pd.testing.assert_series_equal(word_counts, expected_counts)
word_counts = f_124(text).to_dict()
expected_counts = {"i": 3, "want": 2, "to": 8, "go": 1, "olympics": 2, "this": 1, "year": 1}
self.assertDictEqual(word_counts, expected_counts)


def run_tests():
Expand Down
10 changes: 5 additions & 5 deletions data/clean/f_139_armel.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def test_case_1(self):
expected_df = expected_df.pivot(index=COLUMNS[0], columns=COLUMNS[1], values=COLUMNS[2])
# Assertions
self.assertTrue(isinstance(analyzed_df, pd.DataFrame))
pd.testing.assert_frame_equal(analyzed_df, expected_df)
pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
self.assertTrue(isinstance(ax, plt.Axes))

def test_case_2(self):
Expand All @@ -73,7 +73,7 @@ def test_case_2(self):
expected_df = expected_df.pivot(index=COLUMNS[0], columns=COLUMNS[1], values=COLUMNS[2])
# Assertions
self.assertTrue(isinstance(analyzed_df, pd.DataFrame))
pd.testing.assert_frame_equal(analyzed_df, expected_df)
pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
self.assertTrue(isinstance(ax, plt.Axes))

def test_case_3(self):
Expand All @@ -94,7 +94,7 @@ def test_case_3(self):
expected_df = expected_df.pivot(index=COLUMNS[0], columns=COLUMNS[1], values=COLUMNS[2])
# Assertions
self.assertTrue(isinstance(analyzed_df, pd.DataFrame))
pd.testing.assert_frame_equal(analyzed_df, expected_df)
pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
self.assertTrue(isinstance(ax, plt.Axes))

def test_case_4(self):
Expand All @@ -111,7 +111,7 @@ def test_case_4(self):
expected_df = expected_df.pivot(index=COLUMNS[0], columns=COLUMNS[1], values=COLUMNS[2])
# Assertions
self.assertTrue(isinstance(analyzed_df, pd.DataFrame))
pd.testing.assert_frame_equal(analyzed_df, expected_df)
pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
self.assertTrue(isinstance(ax, plt.Axes))

def test_case_5(self):
Expand All @@ -136,7 +136,7 @@ def test_case_5(self):
expected_df = expected_df.pivot(index=COLUMNS[0], columns=COLUMNS[1], values=COLUMNS[2])
# Assertions
self.assertTrue(isinstance(analyzed_df, pd.DataFrame))
pd.testing.assert_frame_equal(analyzed_df, expected_df)
pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
self.assertTrue(isinstance(ax, plt.Axes))

def run_tests():
Expand Down
10 changes: 5 additions & 5 deletions data/clean/f_140_armel.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_case_1(self):
# Assertions for the returned DataFrame
expected_data = [[1, 1, 2], [1, 2, 1], [2, 1, 3], [2, 2, 1]]
expected_df = pd.DataFrame(expected_data, columns=COLUMNS)
pd.testing.assert_frame_equal(analyzed_df, expected_df)
pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)

# Assertions for the returned plot
self.assertEqual(ax.get_xlabel(), 'col1-col2')
Expand All @@ -75,7 +75,7 @@ def test_case_2(self):
[1, 3, 1]
]
expected_df = pd.DataFrame(expected_data, columns=COLUMNS)
pd.testing.assert_frame_equal(analyzed_df, expected_df)
pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
self.assertEqual(ax.get_xlabel(), 'col1-col2')
self.assertEqual(ax.get_ylabel(), 'col3')
self.assertListEqual(list(ax.lines[0].get_ydata()), [3, 1, 1])
Expand All @@ -95,7 +95,7 @@ def test_case_3(self):
[2, 2, 1]
]
expected_df = pd.DataFrame(expected_data, columns=COLUMNS)
pd.testing.assert_frame_equal(analyzed_df, expected_df)
pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
self.assertEqual(ax.get_xlabel(), 'col1-col2')
self.assertEqual(ax.get_ylabel(), 'col3')
self.assertListEqual(list(ax.lines[0].get_ydata()), [1, 1, 1, 1])
Expand All @@ -111,7 +111,7 @@ def test_case_4(self):
[1, 1, 1],
]
expected_df = pd.DataFrame(expected_data, columns=COLUMNS)
pd.testing.assert_frame_equal(analyzed_df, expected_df)
pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
self.assertEqual(ax.get_xlabel(), 'col1-col2')
self.assertEqual(ax.get_ylabel(), 'col3')
self.assertListEqual(list(ax.lines[0].get_ydata()), [1])
Expand All @@ -135,7 +135,7 @@ def test_case_5(self):
[1, 1, 2]
]
expected_df = pd.DataFrame(expected_data, columns=COLUMNS)
pd.testing.assert_frame_equal(analyzed_df, expected_df)
pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
self.assertEqual(ax.get_xlabel(), 'col1-col2')
self.assertEqual(ax.get_ylabel(), 'col3')
self.assertListEqual(list(ax.lines[0].get_ydata()), [2, 2, 2, 2])
Expand Down
Loading

0 comments on commit 656eef7

Please sign in to comment.