-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkNearestNeighbors.py
31 lines (28 loc) · 941 Bytes
/
kNearestNeighbors.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from movies import movie_dataset, movie_labels
def distance(movie1, movie2):
squared_difference = 0
for i in range(len(movie1)):
squared_difference += (movie1[i] - movie2[i]) ** 2
final_distance = squared_difference ** 0.5
return final_distance
def classify(unknown, dataset, labels, k):
distances = []
#Looping through all points in the dataset
for title in dataset:
movie = dataset[title]
distance_to_point = distance(movie, unknown)
#Adding the distance and point associated with that distance
distances.append([distance_to_point, title])
distances.sort()
#Taking only the k closest points
neighbors = distances[0:k]
num_good = 0
num_bad = 0
for neighbor in neighbors:
title = neighbor[1]
if labels[title] == 0:
num_bad += 1
elif labels[title] == 1:
num_good += 1
return 1 if num_good > num_bad else 0
print(classify([.4, .2, .9], movie_dataset, movie_labels, 5))