Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Indraneil Split #41

Merged
merged 9 commits into from
May 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 2 additions & 0 deletions RECORD.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
- Wen-Ding Li
- Ming Xu
- Zhihan Zhang
- Indraneil Paul

## Round 2: Manual Code Refinement
- Terry Yue Zhuo
Expand All @@ -45,6 +46,7 @@
- Dmitry Abulkhanov
- Wen-Ding Li
- Wenhao Yu
- Indraneil Paul

## Round 3: Data Quality Check
- Terry Yue Zhuo
Expand Down
90 changes: 90 additions & 0 deletions data/clean/f_227_indraneil.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import random
import seaborn as sns
import numpy as np
from matplotlib import pyplot as plt

def f_227(length, range_limit=100, seed=0):
"""
Create a list of random numbers, sort them and record the distribution of the numbers in a histogram using
default settings in a deterministic seaborn plot. Return the axes object and the list of random numbers.

Parameters:
length (int): The length of the list of random numbers.
range_limit (int, Optional): The range of the random numbers. Defaults to 100. Must be greater than 1.
seed (int, Optional): The seed value for the random number generator. Defaults to 0.

Returns:
Tuple[matplotlib.axes._subplots.AxesSubplot, List[int]]: The axes object with the plot and the list of random numbers.

Requirements:
- random
- matplotlib.pyplot
- seaborn
- numpy

Raises:
ValueError: If range_limit is less than or equal to 1.

Example:
>>> import matplotlib.pyplot as plt
>>> ax, data = f_227(1000, 100, 24) # Generate a list of 1000 random numbers between 1 and 100
>>> isinstance(ax, plt.Axes)
True
"""
if range_limit <= 1:
raise ValueError("range_limit must be greater than 1")

random.seed(seed)
np.random.seed(seed)

random_numbers = [random.randint(1, range_limit) for _ in range(length)]
random_numbers.sort()

# Initialize a fresh plot
plt.figure()
plot = sns.histplot(random_numbers, kde=False)

return plot.axes, random_numbers


import unittest
import doctest


class TestCases(unittest.TestCase):

def test_case_1(self):
_, data = f_227(1000)
self.assertEqual(len(data), 1000)

def test_case_2(self):
with self.assertRaises(ValueError):
_, data = f_227(1000, -3, 42)


def test_case_3(self):
_, data = f_227(20, 75, 77)
self.assertEqual(data, [1, 4, 15, 19, 23, 25, 25, 26, 31, 31, 33, 36, 38, 42, 61, 64, 65, 65, 72, 72])
self.assertTrue(all(1 <= num <= 75 for num in data))

def test_case_4(self):
ax, data = f_227(1000, 75)
target = np.array([98, 103, 106, 73, 87, 92, 94, 84, 90, 95, 78])
self.assertTrue((ax.containers[0].datavalues == target).all())

def test_case_5(self):
_, data1 = f_227(1000, seed=42)
_, data2 = f_227(1000, seed=42)
self.assertEqual(data1, data2)


def run_tests():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestCases))
runner = unittest.TextTestRunner()
runner.run(suite)


if __name__ == "__main__":
doctest.testmod()
run_tests()
90 changes: 90 additions & 0 deletions data/clean/f_228_indraneil.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import heapq
import math
import matplotlib.pyplot as plt


def f_228(l1, l2, N=10):
"""
Find the N biggest differences between the respective elements of the list 'l1' and list 'l2',
square the differences, take the square root and return the plotted values as a matplotlib Axes object.

Parameters:
l1 (list): A list of numbers.
l2 (list): A list of numbers.
N (int): Number of largest differences to consider. Default is 10.

Returns:
matplotlib.axes._subplots.AxesSubplot: A matplotlib Axes object with the plotted differences.

Requirements:
- heapq
- math
- matplotlib.pyplot

Example:
>>> l1 = [99, 86, 90, 70, 86, 95, 56, 98, 80, 81]
>>> l2 = [21, 11, 21, 1, 26, 40, 4, 50, 34, 37]
>>> ax = f_228(l1, l2)
>>> type(ax)
<class 'matplotlib.axes._axes.Axes'>
"""
largest_diff_indices = heapq.nlargest(N, range(len(l1)), key=lambda i: abs(l1[i] - l2[i]))
largest_diffs = [math.sqrt((l1[i] - l2[i])**2) for i in largest_diff_indices]

fig, ax = plt.subplots()
ax.plot(largest_diffs)

return ax


import unittest
import doctest


class TestCases(unittest.TestCase):
def test_case_1(self):
l1 = [99, 86, 90, 70, 86, 95, 56, 98, 80, 81]
l2 = [21, 11, 21, 1, 26, 40, 4, 50, 34, 37]
ax = f_228(l1, l2)
self.assertIsInstance(ax, plt.Axes)
self.assertEqual(len(ax.lines[0].get_ydata()), 10)

def test_case_2(self):
l1 = [10, 20, 30, 40, 50]
l2 = [1, 2, 3, 4, 5]
ax = f_228(l1, l2, 3)
self.assertIsInstance(ax, plt.Axes)
self.assertEqual(len(ax.lines[0].get_ydata()), 3)

def test_case_3(self):
l1 = [0, 10, 20, 30, 40, 50]
l2 = [0, 0, 0, 0, 0, 0]
ax = f_228(l1, l2)
self.assertIsInstance(ax, plt.Axes)
self.assertEqual(len(ax.lines[0].get_ydata()), 6)

def test_case_4(self):
l1 = [1, 2, 3, 4, 5]
l2 = [5, 4, 3, 2, 1]
ax = f_228(l1, l2)
self.assertIsInstance(ax, plt.Axes)
self.assertEqual(len(ax.lines[0].get_ydata()), 5)

def test_case_5(self):
l1 = [0, 0, 0, 0, 0]
l2 = [0, 0, 0, 0, 0]
ax = f_228(l1, l2)
self.assertIsInstance(ax, plt.Axes)
self.assertEqual(len(ax.lines[0].get_ydata()), 5)


def run_tests():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestCases))
runner = unittest.TextTestRunner()
runner.run(suite)


if __name__ == "__main__":
doctest.testmod()
run_tests()
90 changes: 90 additions & 0 deletions data/clean/f_229_indraneil.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import re
import json
from collections import Counter


def f_229(json_str, top_n=10):
"""
Extract all URLs from a string-serialized JSON dict using a specific URL pattern and return a dict
with the URLs as keys and the number of times they appear as values.

Parameters:
json_str (str): The JSON string.
top_n (int, Optional): The number of URLs to return. Defaults to 10.

Returns:
dict: A dict with URLs as keys and the number of times they appear as values.

Requirements:
- re
- json
- collections.Counter

Example:
>>> f_229('{"name": "John", "website": "https://www.example.com"}')
{'https://www.example.com': 1}
"""
pattern = r'(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})'
data = json.loads(json_str)
urls = []

def extract(dictionary):
for key, value in dictionary.items():
if isinstance(value, dict):
extract(value)
elif isinstance(value, str) and re.match(pattern, value):
urls.append(value)

extract(data)
if not urls:
return {}
elif len(urls) <= top_n:
return dict(Counter(urls))

return dict(Counter(urls).most_common(top_n))


import unittest
import doctest


class TestCases(unittest.TestCase):

def test_case_1(self):
json_str = '{"name": "John", "website": "qwerthttps://www.example.com"}'
result = f_229(json_str)
self.assertEqual(result, {})

def test_case_2(self):
json_str = '{"name": "John", "social": {"twitter": "https://twitter.com/john", "linkedin": "https://linkedin.com/in/john"}, "website": "https://linkedin.com/in/john"}'
result = f_229(json_str)
self.assertEqual(result, {'https://twitter.com/john': 1, 'https://linkedin.com/in/john': 2})
result = f_229(json_str, 1)
self.assertEqual(result, {'https://linkedin.com/in/john': 2})

def test_case_3(self):
json_str = 'This is an adversarial input 0061'
with self.assertRaises(json.decoder.JSONDecodeError):
result = f_229(json_str)

def test_case_4(self):
json_str = '{"name": "John", "age": 30}'
result = f_229(json_str)
self.assertEqual(result, {})

def test_case_5(self):
json_str = '{"name": "John", "website": "example.com", "blog": "www.johnblog.com"}'
result = f_229(json_str)
self.assertEqual(result, {'www.johnblog.com': 1})


def run_tests():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestCases))
runner = unittest.TextTestRunner()
runner.run(suite)


if __name__ == "__main__":
doctest.testmod()
run_tests()
105 changes: 105 additions & 0 deletions data/clean/f_230_indraneil.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import numpy as np
from collections import Counter
import matplotlib.pyplot as plt

def f_230(L):
"""
Analyze an "L" list by calculating the mean, median, mode, and standard deviation.
Visualize the data by returning a histogram plot.

Parameters:
L (list): Input list.

Returns:
dict: A dictionary with the mean, median, mode, and standard deviation of 'L'.

Requirements:
- numpy
- collections.Counter
- matplotlib.pyplot

Example:
>>> L = [1, 2, 3, 4, 5, 6, 7, 8, 9]
>>> stats = f_230(L)
>>> print(stats["mean"])
5.0
>>> print(stats["median"])
5.0
>>> print(stats["mode"])
1
"""
mean = np.mean(L)
median = np.median(L)
mode = Counter(L).most_common(1)[0][0]
std_dev = np.std(L)

plt.hist(L, bins='auto')
plt.title('Histogram of Data')
plt.xlabel('Value')
plt.ylabel('Frequency')

return {'mean': mean, 'median': median, 'mode': mode, 'std_dev': std_dev, 'plot': plt.gca()}


import unittest
import doctest


class TestCases(unittest.TestCase):

def test_case_1(self):
L = [1, 2, 3, 4, 5, 6, 7, 8, 9]
stats = f_230(L)
self.assertAlmostEqual(stats['mean'], np.mean(L))
self.assertAlmostEqual(stats['median'], np.median(L))
self.assertEqual(stats['mode'], 1)
self.assertAlmostEqual(stats['std_dev'], np.std(L))
self.assertIsInstance(stats['plot'], plt.Axes)

def test_case_2(self):
L = [5, 5, 5, 5, 5]
stats = f_230(L)
self.assertAlmostEqual(stats['mean'], 5.0)
self.assertAlmostEqual(stats['median'], 5.0)
self.assertEqual(stats['mode'], 5)
self.assertAlmostEqual(stats['std_dev'], 0.0)
self.assertIsInstance(stats['plot'], plt.Axes)

def test_case_3(self):
L = [1, 2, 3, 4, 5, 5, 6, 7, 8, 8, 8, 9]
stats = f_230(L)
self.assertAlmostEqual(stats['mean'], np.mean(L))
self.assertAlmostEqual(stats['median'], np.median(L))
self.assertEqual(stats['mode'], 8)
self.assertAlmostEqual(stats['std_dev'], np.std(L))
self.assertIsInstance(stats['plot'], plt.Axes)

def test_case_4(self):
L = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
stats = f_230(L)
self.assertAlmostEqual(stats['mean'], np.mean(L))
self.assertAlmostEqual(stats['median'], np.median(L))
self.assertEqual(stats['mode'], 10)
self.assertAlmostEqual(stats['std_dev'], np.std(L))
self.assertIsInstance(stats['plot'], plt.Axes)

def test_case_5(self):
L = [5]
stats = f_230(L)
self.assertAlmostEqual(stats['mean'], 5.0)
self.assertAlmostEqual(stats['median'], 5.0)
self.assertEqual(stats['mode'], 5)
self.assertAlmostEqual(stats['std_dev'], 0.0)
self.assertIsInstance(stats['plot'], plt.Axes)


def run_tests():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestCases))
runner = unittest.TextTestRunner()
runner.run(suite)


if __name__ == "__main__":
doctest.testmod()
run_tests()
Loading
Loading