Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
avinabsaha committed Dec 14, 2018
0 parents commit 2b59426
Show file tree
Hide file tree
Showing 40 changed files with 8,562 additions and 0 deletions.
92 changes: 92 additions & 0 deletions Concept Learning/15EC10071_1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Roll : 15EC10071
# Name : Avinab Saha
# Assignment Number : 1
# Flags : python3 15EC10071_1.py <datafile.csv>

# import necessary package
import sys

count = len(sys.argv)
if (count!=2):
print("Please use correct format: python3 <code filename> <data filename>")
sys.exit(0)


# Defining Data Matrix
w, h = 9, 20
Matrix = [[0 for x in range(w)] for y in range(h)]

# Read the CSV File line-by-line
with open(sys.argv[1]) as file:
line_count=0
for line in file:
line = line.strip()
line = line.split(',')
for i in range(9):
Matrix[line_count][i]=line[i]

line_count = line_count+1

# Count Number of 1's
noOfOnes = 0
for i in range(20):
if (Matrix[i][8]=='1'):
noOfOnes = noOfOnes+1

# Defining Final Data Matrix
w, h = 9, noOfOnes
Matrix2 = [[0 for x in range(w)] for y in range(h)]

# Copy To Final Data Matrix
rowCount = 0
for i in range (20):
if (Matrix[i][8]=='0'):
continue
else:
for j in range(9):
Matrix2[rowCount][j]=Matrix[i][j]
rowCount = rowCount+1

# Array to Store Status of all the Variables & Make them all zero
w = 8
Status = [0 for x in range(w)]
for i in range(8):
Status[i] = 0

for i in range(8):
initial = Matrix2[0][i]
flag = 1
for j in range(noOfOnes):
if(initial!= Matrix2[j][i]):
flag = 0
if (flag==1):
if(initial=='1'):
Status[i] = 1
else:
Status[i] = -1

# Count Number of Non Zero Elements
countNonZero = 0;
for i in range(8):
if(Status[i]!=0):
countNonZero = countNonZero+1

# Formatted Output
print(countNonZero,end = "")
print(",",end = "")
countNow = 0
for i in range(8):
if(Status[i] == 1):
print(i+1,end = "")
countNow = countNow + 1
if (countNow!= countNonZero):
print(",", end = "")
if (countNow == countNonZero):
print()
if(Status[i] == -1):
print((i+1)*-1,end = "")
countNow = countNow + 1
if (countNow!= countNonZero):
print(",", end = "")
if (countNow == countNonZero):
print()
Binary file added Concept Learning/assignment1.pdf
Binary file not shown.
20 changes: 20 additions & 0 deletions Concept Learning/data1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
1,1,1,1,1,1,0,1,1
1,1,1,1,1,1,0,0,1
1,1,1,1,1,1,1,1,0
1,1,1,1,1,0,0,1,1
1,1,1,1,1,0,0,0,1
1,1,1,0,1,1,0,1,1
1,1,0,1,1,1,0,1,0
1,1,1,0,1,1,0,0,1
1,1,1,0,1,0,0,1,1
1,1,1,0,1,0,0,0,1
0,1,1,1,1,1,0,1,1
0,1,1,1,1,1,0,0,1
1,0,1,1,1,1,0,1,0
0,1,1,1,1,0,0,1,1
1,1,0,1,0,1,0,1,0
1,0,0,1,1,1,0,1,0
1,0,0,1,0,1,1,1,0
0,1,1,1,1,0,0,0,1
1,0,1,1,1,1,1,1,0
0,1,1,0,1,1,0,1,1
1 change: 1 addition & 0 deletions Decision Trees/15EC10071_2.out
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0 0 1 1
151 changes: 151 additions & 0 deletions Decision Trees/15EC10071_2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
# Roll: 15EC10071
# Name: Avinab Saha
# Assignment number: 2
# Specific compilation/execution flags: None


import numpy as np

def entropy(labels):
result = 0
val, counts = np.unique(labels, return_counts=True)
freqs = counts.astype('float')/len(labels)
for p in freqs:
if p != 0.0:
result -= p * np.log2(p)
return result

def info_gain(attribute_data,labels):
result = entropy(labels)
val, counts = np.unique(attribute_data, return_counts=True)
freqs = counts.astype('float')/len(attribute_data)
for p, v in zip(freqs, val):
result -= p * entropy(labels[attribute_data == v])
return result

def choose_best_attribute(data, labels, attributes):
best_gain = -999999
best_attribute = None
for loop in range(0,len(attributes)):
attribute_data = data[:, loop]
gain = info_gain(attribute_data, labels)
if gain > best_gain:
best_gain = gain
best_attribute = attributes[loop]
return best_attribute

def choose_best_attribute_column(attributes,attribute):
for loop in range(0,len(attributes)):
if (attribute == attributes[loop]):
return loop

def find_child_attribures(attributes,attribute):
child_attributes = []
for loop in range(0,len(attributes)):
if attributes[loop] != attribute:
child_attributes.append(attributes[loop])
return child_attributes

def get_label(length,label):
return np.ones(length) * label


class DecisionTree:

def __init__(self, data, labels, attributes, max_level, old_level,value,parent,children):
self.level = old_level + 1
self.max_level = max_level
self.attribute_value = value
self.parent = parent
self.children = children
all_same = True
reference = labels[0]
for loop in range(1,len(labels)):
if (labels[loop]!=reference):
all_same = False
break
if(all_same == True):
self.label = labels[0]
return
self.build(data, labels, attributes)
return

def build(self, data, labels, attributes):
self.attribute = choose_best_attribute(data, labels, attributes)
best_attribute_column = choose_best_attribute_column(attributes,self.attribute)
attribute_data = data[:, best_attribute_column]
child_attributes = find_child_attribures(attributes,self.attribute)
self.children = []
for val in np.unique(attribute_data):
child_data = np.delete(data[attribute_data == val,:], best_attribute_column,1)
child_labels = labels[attribute_data == val]
self.children.append(DecisionTree(child_data, child_labels, child_attributes,self.max_level,self.level,val,self,None))

def classify(self, data):
if len(data.shape) == 1:
data = np.reshape(data, (1,len(data)))
if self.children is None:
return get_label(len(data),self.label)
labels = np.zeros(len(data))
for child in self.children:
child_attr_index = data[:,self.attribute] == child.attribute_value
labels[child_attr_index ] = child.classify(data[child_attr_index])
return labels

# Reading number of Training Examples
with open('data2.csv') as file:
line_count=0
for line in file:
line_count = line_count+1

# Defining Data Matrix
w1 = 8
w2 = 1
h = line_count
Training_Data = np.zeros(shape=(h,w1))
Training_Label = np.zeros(h)

# Reading the Training Data Provided
with open('data2.csv') as file:
line_count=0
for line in file:
line = line.strip()
line = line.split(',')
for i in range(9):
if i<8:
Training_Data[line_count][i] = line[i]
else:
Training_Label[line_count] = line[i]

line_count = line_count+1

# Reading number of Training ETraining_Dataamples
with open('test2.csv') as file:
line_count=0
for line in file:
line_count = line_count+1

# Defining Data Matrix
w1 = 8
h = line_count
Test_Data = np.zeros(shape=(h,w1))

# Reading the Test Data Provided
with open('test2.csv') as file:
line_count=0
for line in file:
line = line.strip()
line = line.split(',')
for i in range(8):
Test_Data[line_count][i] = line[i]

line_count = line_count+1

attributes = list(range(len(Training_Data[0])))
tree = DecisionTree(Training_Data, Training_Label, attributes, 8, 0,None, None, None)
y = tree.classify(Test_Data)
with open('15EC10071_2.out','w') as file:
for x in y:
file.write(str(int(x)))
file.write(" ")
file.close()
Binary file added Decision Trees/assignment2.pdf
Binary file not shown.
24 changes: 24 additions & 0 deletions Decision Trees/data2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
1,1,1,1,1,1,0,1,1
1,1,1,1,1,1,0,0,1
0,1,1,1,1,1,1,1,0
1,1,1,1,1,0,0,1,1
1,1,1,1,1,0,0,0,1
1,1,1,0,1,1,0,1,1
0,1,0,1,1,1,0,1,0
1,1,1,0,1,1,0,0,1
1,1,1,0,1,0,0,1,1
1,1,1,0,1,0,0,0,1
0,1,1,1,1,1,0,1,1
0,1,1,1,1,1,0,0,1
0,0,1,1,1,1,0,1,0
0,1,1,1,1,0,0,1,1
0,1,0,1,0,1,0,1,0
0,0,0,1,1,1,0,1,0
0,0,0,1,0,1,1,1,0
0,1,1,1,1,0,0,0,1
0,0,1,1,1,1,1,1,0
0,1,1,0,1,1,0,1,1
0,0,1,1,0,1,1,1,0
0,0,0,1,0,1,1,1,0
1,1,1,0,1,0,1,1,1
1,1,0,0,1,0,1,1,1
4 changes: 4 additions & 0 deletions Decision Trees/test2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
0,1,1,1,1,1,1,1
1,0,0,0,0,0,0,0
0,1,1,0,1,0,0,0
0,1,1,1,1,0,0,0
1 change: 1 addition & 0 deletions K Means Clustering/15EC10071_7.out
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1 1 1 1 1 1 1 1 1 1 2 2 1 2 1 1 1 2 1 2
86 changes: 86 additions & 0 deletions K Means Clustering/15EC10071_7.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# Name: Avinab Saha
# Roll: 15EC10071
# Assignment 7 ML CS60050
# Tested with Python 2.7.12 (default, Nov 20 2017, 18:23:56) [GCC 5.4.0 20160609] on linux2

import numpy as np

def euclidean(P1,P2,w):
dist=0
for i in range(w):
dist =dist+ (P1[i]-P2[i])*(P1[i]-P2[i])
return(np.sqrt(dist))

# Reading number of Training Examples
with open('data7.csv') as file:
line_count=0
for line in file:
line_count = line_count+1

line = line.strip()
line = line.split(',')
no_of_attributes = len(line)

# Defining Data Matrix
w = no_of_attributes
h = line_count
Training_Data = np.zeros(shape=(h,w))
Label = np.zeros(h,dtype=int)
# Reading the Training Data Provided
with open('data7.csv') as file:
line_count=0
for line in file:
line = line.strip()
line = line.split(',')
for i in range(w):
Training_Data[line_count][i] = line[i]
line_count = line_count+1


#print(Training_Data)
# Set Seed to generate same results always
# np.random.seed(41)
# Randomly Assign Cluster Centres

one = np.random.random_integers(19)
two = np.random.random_integers(19)
while(one==two):
two = np.random.random_integers(19)
#print(one,two)


C1 = Training_Data[one,:]
C2 = Training_Data[two,:]
#print(Label)

for loop in range(10):
# Assign Labels
for i in range(h):
d1= euclidean(C1,Training_Data[i,:],w)
d2= euclidean(C2,Training_Data[i,:],w)
if (d1<=d2):
Label [i]= 1
if (d1>d2):
Label[i] = 2
# Update Centre of Cluster
no_clusters_in_1 = np.count_nonzero(Label==1)
no_clusters_in_2 = np.count_nonzero(Label==2)
#print(no_clusters_in_1,no_clusters_in_2)
temp_C1= np.zeros(w)
temp_C2= np.zeros(w)
for i in range(h):
if(Label[i]==1):
temp_C1 = temp_C1+ Training_Data[i,:]
if(Label[i]==2):
temp_C2 = temp_C2+ Training_Data[i,:]
C1 = temp_C1/no_clusters_in_1
C2 = temp_C2/no_clusters_in_2
#print(Label)


with open('15EC10071_7.out','w') as file:
#print(Label)
for x in Label:
file.write(str(int(x)))
file.write(" ")
file.close()
Binary file added K Means Clustering/assignment7.pdf
Binary file not shown.
Loading

0 comments on commit 2b59426

Please sign in to comment.