Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ml linear regression 24 #853

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions algorithms/ml/linear_regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import math
import statistics
import random


def simple_linear_regression(data):
""" Simple linear regression model of type y = b_0 + b_1*x
Cost function to minimize is RSS with data segmentation according to
k fold cross validation
"""

# Initializing parameters using random normal distribution
param_0 = random.random
param_1 = random.random

mean_x = statistics.mean(data[0])
mean_y = statistics.mean(data[1])

numerator = 0 # ((data[0]-mean_x)*(data[1]-mean_y))
denominator = 0
for index in range(len(data[0])):
numerator += (data[0][index]-mean_x)*(data[1][index]-mean_y)
denominator += (data[0][index]-mean_x)**2

param_1 = numerator / denominator
param_0 = mean_y - param_1*mean_x
rss = RSS_calculator([param_0, param_1], data)
rse = RSE_calculator(rss, len(data[0]))
r_2 = R_2(rss, mean_y, data)

return (param_0, param_1, rse, r_2)


def RSS_calculator(params, data):
rss = 0
for index in range(len(data[0])):
rss += (data[1][index] - (params[0] + params[1]*data[0][index]))**2
return rss


def RSE_calculator(rss, n):
return math.sqrt(rss / (n - 2))


def R_2(rss, mean_y, data):
tss = 0
for val in data[1]:
tss += (val-mean_y) ** 2
r_2 = 1 - (rss/tss)
return r_2
19 changes: 18 additions & 1 deletion tests/test_ml.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
from algorithms.ml.nearest_neighbor import (
distance,
nearest_neighbor
nearest_neighbor,
)

from algorithms.ml.linear_regression import (
simple_linear_regression
)

import unittest
Expand All @@ -12,6 +16,10 @@ def setUp(self):
self.trainSetAND = {(0, 0): 0, (0, 1): 0, (1, 0): 0, (1, 1): 1}

# train set for light or dark colors

self.data_set_linear_reg = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
[1, 3, 2, 5, 7, 8, 8, 9, 10, 12]]

self.trainSetLight = {(11, 98, 237): 'L', (3, 39, 96): 'D',
(242, 226, 12): 'L', (99, 93, 4): 'D',
(232, 62, 32): 'L', (119, 28, 11): 'D',
Expand All @@ -25,6 +33,7 @@ def test_nearest_neighbor(self):
self.assertEqual(nearest_neighbor((0, 1), self.trainSetAND), 0)

# dark/light color test

self.assertEqual(nearest_neighbor((31, 242, 164),
self.trainSetLight), 'L')
self.assertEqual(nearest_neighbor((13, 94, 64),
Expand All @@ -35,6 +44,14 @@ def test_nearest_neighbor(self):
def test_distance(self):
self.assertAlmostEqual(distance((1, 2, 3), (1, 0, -1)), 4.47, 2)

def test_linear_regression(self):
param_0, param_1, rse, r_2 = simple_linear_regression(
self.data_set_linear_reg)

self.assertAlmostEqual(param_0, 1.2363636363636363)
self.assertAlmostEqual(param_1, 1.1696969696969697)
self.assertAlmostEqual(rse, 0.8384690232980003)
self.assertAlmostEqual(r_2, 0.952538038613988)

if __name__ == "__main__":
unittest.main()