forked from avinabsaha/ML-CS60050
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 2b59426
Showing
40 changed files
with
8,562 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
# Roll : 15EC10071 | ||
# Name : Avinab Saha | ||
# Assignment Number : 1 | ||
# Flags : python3 15EC10071_1.py <datafile.csv> | ||
|
||
# import necessary package | ||
import sys | ||
|
||
count = len(sys.argv) | ||
if (count!=2): | ||
print("Please use correct format: python3 <code filename> <data filename>") | ||
sys.exit(0) | ||
|
||
|
||
# Defining Data Matrix | ||
w, h = 9, 20 | ||
Matrix = [[0 for x in range(w)] for y in range(h)] | ||
|
||
# Read the CSV File line-by-line | ||
with open(sys.argv[1]) as file: | ||
line_count=0 | ||
for line in file: | ||
line = line.strip() | ||
line = line.split(',') | ||
for i in range(9): | ||
Matrix[line_count][i]=line[i] | ||
|
||
line_count = line_count+1 | ||
|
||
# Count Number of 1's | ||
noOfOnes = 0 | ||
for i in range(20): | ||
if (Matrix[i][8]=='1'): | ||
noOfOnes = noOfOnes+1 | ||
|
||
# Defining Final Data Matrix | ||
w, h = 9, noOfOnes | ||
Matrix2 = [[0 for x in range(w)] for y in range(h)] | ||
|
||
# Copy To Final Data Matrix | ||
rowCount = 0 | ||
for i in range (20): | ||
if (Matrix[i][8]=='0'): | ||
continue | ||
else: | ||
for j in range(9): | ||
Matrix2[rowCount][j]=Matrix[i][j] | ||
rowCount = rowCount+1 | ||
|
||
# Array to Store Status of all the Variables & Make them all zero | ||
w = 8 | ||
Status = [0 for x in range(w)] | ||
for i in range(8): | ||
Status[i] = 0 | ||
|
||
for i in range(8): | ||
initial = Matrix2[0][i] | ||
flag = 1 | ||
for j in range(noOfOnes): | ||
if(initial!= Matrix2[j][i]): | ||
flag = 0 | ||
if (flag==1): | ||
if(initial=='1'): | ||
Status[i] = 1 | ||
else: | ||
Status[i] = -1 | ||
|
||
# Count Number of Non Zero Elements | ||
countNonZero = 0; | ||
for i in range(8): | ||
if(Status[i]!=0): | ||
countNonZero = countNonZero+1 | ||
|
||
# Formatted Output | ||
print(countNonZero,end = "") | ||
print(",",end = "") | ||
countNow = 0 | ||
for i in range(8): | ||
if(Status[i] == 1): | ||
print(i+1,end = "") | ||
countNow = countNow + 1 | ||
if (countNow!= countNonZero): | ||
print(",", end = "") | ||
if (countNow == countNonZero): | ||
print() | ||
if(Status[i] == -1): | ||
print((i+1)*-1,end = "") | ||
countNow = countNow + 1 | ||
if (countNow!= countNonZero): | ||
print(",", end = "") | ||
if (countNow == countNonZero): | ||
print() |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
1,1,1,1,1,1,0,1,1 | ||
1,1,1,1,1,1,0,0,1 | ||
1,1,1,1,1,1,1,1,0 | ||
1,1,1,1,1,0,0,1,1 | ||
1,1,1,1,1,0,0,0,1 | ||
1,1,1,0,1,1,0,1,1 | ||
1,1,0,1,1,1,0,1,0 | ||
1,1,1,0,1,1,0,0,1 | ||
1,1,1,0,1,0,0,1,1 | ||
1,1,1,0,1,0,0,0,1 | ||
0,1,1,1,1,1,0,1,1 | ||
0,1,1,1,1,1,0,0,1 | ||
1,0,1,1,1,1,0,1,0 | ||
0,1,1,1,1,0,0,1,1 | ||
1,1,0,1,0,1,0,1,0 | ||
1,0,0,1,1,1,0,1,0 | ||
1,0,0,1,0,1,1,1,0 | ||
0,1,1,1,1,0,0,0,1 | ||
1,0,1,1,1,1,1,1,0 | ||
0,1,1,0,1,1,0,1,1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
0 0 1 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
# Roll: 15EC10071 | ||
# Name: Avinab Saha | ||
# Assignment number: 2 | ||
# Specific compilation/execution flags: None | ||
|
||
|
||
import numpy as np | ||
|
||
def entropy(labels): | ||
result = 0 | ||
val, counts = np.unique(labels, return_counts=True) | ||
freqs = counts.astype('float')/len(labels) | ||
for p in freqs: | ||
if p != 0.0: | ||
result -= p * np.log2(p) | ||
return result | ||
|
||
def info_gain(attribute_data,labels): | ||
result = entropy(labels) | ||
val, counts = np.unique(attribute_data, return_counts=True) | ||
freqs = counts.astype('float')/len(attribute_data) | ||
for p, v in zip(freqs, val): | ||
result -= p * entropy(labels[attribute_data == v]) | ||
return result | ||
|
||
def choose_best_attribute(data, labels, attributes): | ||
best_gain = -999999 | ||
best_attribute = None | ||
for loop in range(0,len(attributes)): | ||
attribute_data = data[:, loop] | ||
gain = info_gain(attribute_data, labels) | ||
if gain > best_gain: | ||
best_gain = gain | ||
best_attribute = attributes[loop] | ||
return best_attribute | ||
|
||
def choose_best_attribute_column(attributes,attribute): | ||
for loop in range(0,len(attributes)): | ||
if (attribute == attributes[loop]): | ||
return loop | ||
|
||
def find_child_attribures(attributes,attribute): | ||
child_attributes = [] | ||
for loop in range(0,len(attributes)): | ||
if attributes[loop] != attribute: | ||
child_attributes.append(attributes[loop]) | ||
return child_attributes | ||
|
||
def get_label(length,label): | ||
return np.ones(length) * label | ||
|
||
|
||
class DecisionTree: | ||
|
||
def __init__(self, data, labels, attributes, max_level, old_level,value,parent,children): | ||
self.level = old_level + 1 | ||
self.max_level = max_level | ||
self.attribute_value = value | ||
self.parent = parent | ||
self.children = children | ||
all_same = True | ||
reference = labels[0] | ||
for loop in range(1,len(labels)): | ||
if (labels[loop]!=reference): | ||
all_same = False | ||
break | ||
if(all_same == True): | ||
self.label = labels[0] | ||
return | ||
self.build(data, labels, attributes) | ||
return | ||
|
||
def build(self, data, labels, attributes): | ||
self.attribute = choose_best_attribute(data, labels, attributes) | ||
best_attribute_column = choose_best_attribute_column(attributes,self.attribute) | ||
attribute_data = data[:, best_attribute_column] | ||
child_attributes = find_child_attribures(attributes,self.attribute) | ||
self.children = [] | ||
for val in np.unique(attribute_data): | ||
child_data = np.delete(data[attribute_data == val,:], best_attribute_column,1) | ||
child_labels = labels[attribute_data == val] | ||
self.children.append(DecisionTree(child_data, child_labels, child_attributes,self.max_level,self.level,val,self,None)) | ||
|
||
def classify(self, data): | ||
if len(data.shape) == 1: | ||
data = np.reshape(data, (1,len(data))) | ||
if self.children is None: | ||
return get_label(len(data),self.label) | ||
labels = np.zeros(len(data)) | ||
for child in self.children: | ||
child_attr_index = data[:,self.attribute] == child.attribute_value | ||
labels[child_attr_index ] = child.classify(data[child_attr_index]) | ||
return labels | ||
|
||
# Reading number of Training Examples | ||
with open('data2.csv') as file: | ||
line_count=0 | ||
for line in file: | ||
line_count = line_count+1 | ||
|
||
# Defining Data Matrix | ||
w1 = 8 | ||
w2 = 1 | ||
h = line_count | ||
Training_Data = np.zeros(shape=(h,w1)) | ||
Training_Label = np.zeros(h) | ||
|
||
# Reading the Training Data Provided | ||
with open('data2.csv') as file: | ||
line_count=0 | ||
for line in file: | ||
line = line.strip() | ||
line = line.split(',') | ||
for i in range(9): | ||
if i<8: | ||
Training_Data[line_count][i] = line[i] | ||
else: | ||
Training_Label[line_count] = line[i] | ||
|
||
line_count = line_count+1 | ||
|
||
# Reading number of Training ETraining_Dataamples | ||
with open('test2.csv') as file: | ||
line_count=0 | ||
for line in file: | ||
line_count = line_count+1 | ||
|
||
# Defining Data Matrix | ||
w1 = 8 | ||
h = line_count | ||
Test_Data = np.zeros(shape=(h,w1)) | ||
|
||
# Reading the Test Data Provided | ||
with open('test2.csv') as file: | ||
line_count=0 | ||
for line in file: | ||
line = line.strip() | ||
line = line.split(',') | ||
for i in range(8): | ||
Test_Data[line_count][i] = line[i] | ||
|
||
line_count = line_count+1 | ||
|
||
attributes = list(range(len(Training_Data[0]))) | ||
tree = DecisionTree(Training_Data, Training_Label, attributes, 8, 0,None, None, None) | ||
y = tree.classify(Test_Data) | ||
with open('15EC10071_2.out','w') as file: | ||
for x in y: | ||
file.write(str(int(x))) | ||
file.write(" ") | ||
file.close() |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
1,1,1,1,1,1,0,1,1 | ||
1,1,1,1,1,1,0,0,1 | ||
0,1,1,1,1,1,1,1,0 | ||
1,1,1,1,1,0,0,1,1 | ||
1,1,1,1,1,0,0,0,1 | ||
1,1,1,0,1,1,0,1,1 | ||
0,1,0,1,1,1,0,1,0 | ||
1,1,1,0,1,1,0,0,1 | ||
1,1,1,0,1,0,0,1,1 | ||
1,1,1,0,1,0,0,0,1 | ||
0,1,1,1,1,1,0,1,1 | ||
0,1,1,1,1,1,0,0,1 | ||
0,0,1,1,1,1,0,1,0 | ||
0,1,1,1,1,0,0,1,1 | ||
0,1,0,1,0,1,0,1,0 | ||
0,0,0,1,1,1,0,1,0 | ||
0,0,0,1,0,1,1,1,0 | ||
0,1,1,1,1,0,0,0,1 | ||
0,0,1,1,1,1,1,1,0 | ||
0,1,1,0,1,1,0,1,1 | ||
0,0,1,1,0,1,1,1,0 | ||
0,0,0,1,0,1,1,1,0 | ||
1,1,1,0,1,0,1,1,1 | ||
1,1,0,0,1,0,1,1,1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
0,1,1,1,1,1,1,1 | ||
1,0,0,0,0,0,0,0 | ||
0,1,1,0,1,0,0,0 | ||
0,1,1,1,1,0,0,0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
1 1 1 1 1 1 1 1 1 1 2 2 1 2 1 1 1 2 1 2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
# Name: Avinab Saha | ||
# Roll: 15EC10071 | ||
# Assignment 7 ML CS60050 | ||
# Tested with Python 2.7.12 (default, Nov 20 2017, 18:23:56) [GCC 5.4.0 20160609] on linux2 | ||
|
||
import numpy as np | ||
|
||
def euclidean(P1,P2,w): | ||
dist=0 | ||
for i in range(w): | ||
dist =dist+ (P1[i]-P2[i])*(P1[i]-P2[i]) | ||
return(np.sqrt(dist)) | ||
|
||
# Reading number of Training Examples | ||
with open('data7.csv') as file: | ||
line_count=0 | ||
for line in file: | ||
line_count = line_count+1 | ||
|
||
line = line.strip() | ||
line = line.split(',') | ||
no_of_attributes = len(line) | ||
|
||
# Defining Data Matrix | ||
w = no_of_attributes | ||
h = line_count | ||
Training_Data = np.zeros(shape=(h,w)) | ||
Label = np.zeros(h,dtype=int) | ||
# Reading the Training Data Provided | ||
with open('data7.csv') as file: | ||
line_count=0 | ||
for line in file: | ||
line = line.strip() | ||
line = line.split(',') | ||
for i in range(w): | ||
Training_Data[line_count][i] = line[i] | ||
line_count = line_count+1 | ||
|
||
|
||
#print(Training_Data) | ||
# Set Seed to generate same results always | ||
# np.random.seed(41) | ||
# Randomly Assign Cluster Centres | ||
|
||
one = np.random.random_integers(19) | ||
two = np.random.random_integers(19) | ||
while(one==two): | ||
two = np.random.random_integers(19) | ||
#print(one,two) | ||
|
||
|
||
C1 = Training_Data[one,:] | ||
C2 = Training_Data[two,:] | ||
#print(Label) | ||
|
||
for loop in range(10): | ||
# Assign Labels | ||
for i in range(h): | ||
d1= euclidean(C1,Training_Data[i,:],w) | ||
d2= euclidean(C2,Training_Data[i,:],w) | ||
if (d1<=d2): | ||
Label [i]= 1 | ||
if (d1>d2): | ||
Label[i] = 2 | ||
# Update Centre of Cluster | ||
no_clusters_in_1 = np.count_nonzero(Label==1) | ||
no_clusters_in_2 = np.count_nonzero(Label==2) | ||
#print(no_clusters_in_1,no_clusters_in_2) | ||
temp_C1= np.zeros(w) | ||
temp_C2= np.zeros(w) | ||
for i in range(h): | ||
if(Label[i]==1): | ||
temp_C1 = temp_C1+ Training_Data[i,:] | ||
if(Label[i]==2): | ||
temp_C2 = temp_C2+ Training_Data[i,:] | ||
C1 = temp_C1/no_clusters_in_1 | ||
C2 = temp_C2/no_clusters_in_2 | ||
#print(Label) | ||
|
||
|
||
with open('15EC10071_7.out','w') as file: | ||
#print(Label) | ||
for x in Label: | ||
file.write(str(int(x))) | ||
file.write(" ") | ||
file.close() |
Binary file not shown.
Oops, something went wrong.