first commit

singla-excelsior · Dec 14, 2018 · 2b59426 · 2b59426
commit 2b59426
Show file tree

Hide file tree

Showing 40 changed files with 8,562 additions and 0 deletions.
diff --git a/Concept Learning/15EC10071_1.py b/Concept Learning/15EC10071_1.py
@@ -0,0 +1,92 @@
+# Roll               : 15EC10071
+# Name               : Avinab Saha
+# Assignment Number  : 1
+# Flags              : python3 15EC10071_1.py <datafile.csv>
+
+# import necessary package
+import sys
+
+count = len(sys.argv)
+if (count!=2):
+    print("Please use correct format: python3 <code filename> <data filename>")
+    sys.exit(0)
+
+
+# Defining Data Matrix
+w, h = 9, 20
+Matrix = [[0 for x in range(w)] for y in range(h)] 
+
+# Read the CSV File line-by-line
+with open(sys.argv[1]) as file:
+    line_count=0
+    for line in file:
+        line = line.strip()
+        line = line.split(',')
+        for i in range(9):
+            Matrix[line_count][i]=line[i]
+
+        line_count = line_count+1
+
+# Count Number of 1's
+noOfOnes = 0
+for i in range(20):
+    if (Matrix[i][8]=='1'):
+        noOfOnes = noOfOnes+1
+
+# Defining Final Data Matrix
+w, h = 9, noOfOnes
+Matrix2 = [[0 for x in range(w)] for y in range(h)] 
+
+# Copy To Final Data Matrix
+rowCount = 0
+for i in range (20):
+    if (Matrix[i][8]=='0'):
+        continue
+    else:
+        for j in range(9):
+            Matrix2[rowCount][j]=Matrix[i][j]
+        rowCount = rowCount+1
+
+# Array to Store Status of all the Variables & Make them all zero
+w = 8
+Status = [0 for x in range(w)]
+for i in range(8): 
+    Status[i] = 0
+
+for i in range(8):
+    initial  = Matrix2[0][i]
+    flag = 1
+    for j in range(noOfOnes):
+        if(initial!= Matrix2[j][i]):
+            flag = 0
+    if (flag==1):
+        if(initial=='1'):
+            Status[i] = 1
+        else:
+            Status[i] = -1
+
+# Count Number of Non Zero Elements
+countNonZero = 0;
+for i in range(8):
+    if(Status[i]!=0):
+        countNonZero = countNonZero+1
+
+# Formatted Output
+print(countNonZero,end = "")
+print(",",end = "")
+countNow = 0
+for i in range(8):
+    if(Status[i] == 1):
+        print(i+1,end = "")
+        countNow = countNow + 1
+        if (countNow!= countNonZero):
+            print(",", end = "")
+        if (countNow == countNonZero):
+            print()
+    if(Status[i] == -1):
+        print((i+1)*-1,end = "")
+        countNow = countNow + 1
+        if (countNow!= countNonZero):
+            print(",", end = "")
+        if (countNow == countNonZero):
+            print()  
diff --git a/Concept Learning/assignment1.pdf b/Concept Learning/assignment1.pdf
diff --git a/Concept Learning/data1.csv b/Concept Learning/data1.csv
@@ -0,0 +1,20 @@
+1,1,1,1,1,1,0,1,1
+1,1,1,1,1,1,0,0,1
+1,1,1,1,1,1,1,1,0
+1,1,1,1,1,0,0,1,1
+1,1,1,1,1,0,0,0,1
+1,1,1,0,1,1,0,1,1
+1,1,0,1,1,1,0,1,0
+1,1,1,0,1,1,0,0,1
+1,1,1,0,1,0,0,1,1
+1,1,1,0,1,0,0,0,1
+0,1,1,1,1,1,0,1,1
+0,1,1,1,1,1,0,0,1
+1,0,1,1,1,1,0,1,0
+0,1,1,1,1,0,0,1,1
+1,1,0,1,0,1,0,1,0
+1,0,0,1,1,1,0,1,0
+1,0,0,1,0,1,1,1,0
+0,1,1,1,1,0,0,0,1
+1,0,1,1,1,1,1,1,0
+0,1,1,0,1,1,0,1,1
diff --git a/Decision Trees/15EC10071_2.out b/Decision Trees/15EC10071_2.out
@@ -0,0 +1 @@
+0 0 1 1 
diff --git a/Decision Trees/15EC10071_2.py b/Decision Trees/15EC10071_2.py
@@ -0,0 +1,151 @@
+# Roll: 15EC10071
+# Name: Avinab Saha
+# Assignment number: 2
+# Specific compilation/execution flags: None
+
+
+import numpy as np
+
+def entropy(labels):
+    result = 0
+    val, counts = np.unique(labels, return_counts=True)
+    freqs = counts.astype('float')/len(labels)
+    for p in freqs:
+        if p != 0.0:
+            result -= p * np.log2(p)
+    return result
+
+def info_gain(attribute_data,labels):
+    result = entropy(labels)
+    val, counts = np.unique(attribute_data, return_counts=True)
+    freqs = counts.astype('float')/len(attribute_data)
+    for p, v in zip(freqs, val):
+        result -= p * entropy(labels[attribute_data == v])
+    return result
+
+def choose_best_attribute(data, labels, attributes):
+		best_gain = -999999
+		best_attribute = None
+		for loop in range(0,len(attributes)):
+			attribute_data = data[:, loop]
+			gain = info_gain(attribute_data, labels)
+			if gain > best_gain:
+				best_gain = gain
+				best_attribute = attributes[loop]
+		return best_attribute
+
+def choose_best_attribute_column(attributes,attribute):
+	for loop in range(0,len(attributes)):
+			if (attribute == attributes[loop]):
+				return loop
+
+def find_child_attribures(attributes,attribute):
+	child_attributes = []
+	for loop in range(0,len(attributes)):
+			if attributes[loop] != attribute:
+				child_attributes.append(attributes[loop])
+	return child_attributes
+
+def get_label(length,label):
+	return np.ones(length) * label
+
+
+class DecisionTree:
+
+	def __init__(self, data, labels, attributes, max_level, old_level,value,parent,children):
+		self.level = old_level + 1
+		self.max_level = max_level		
+		self.attribute_value = value
+		self.parent = parent
+		self.children = children
+		all_same = True
+		reference = labels[0]
+		for loop in range(1,len(labels)):
+			if (labels[loop]!=reference):
+				all_same = False
+				break
+		if(all_same == True):
+			self.label = labels[0]
+			return
+		self.build(data, labels, attributes)
+		return
+
+	def build(self, data, labels, attributes):
+		self.attribute = choose_best_attribute(data, labels, attributes)
+		best_attribute_column = choose_best_attribute_column(attributes,self.attribute)
+		attribute_data = data[:, best_attribute_column]
+		child_attributes = find_child_attribures(attributes,self.attribute)
+		self.children = []
+		for val in np.unique(attribute_data):
+			child_data = np.delete(data[attribute_data == val,:], best_attribute_column,1)
+			child_labels = labels[attribute_data == val]
+			self.children.append(DecisionTree(child_data, child_labels, child_attributes,self.max_level,self.level,val,self,None))
+
+	def classify(self, data):
+		if len(data.shape) == 1:
+			data = np.reshape(data, (1,len(data)))
+		if self.children is None:
+			return get_label(len(data),self.label)
+		labels = np.zeros(len(data))
+		for child in self.children:
+			child_attr_index = data[:,self.attribute] == child.attribute_value
+			labels[child_attr_index ] = child.classify(data[child_attr_index])
+		return labels
+
+# Reading number of Training Examples
+with open('data2.csv') as file:
+    line_count=0
+    for line in file:    
+        line_count = line_count+1
+
+# Defining Data Matrix
+w1 = 8
+w2 = 1
+h = line_count
+Training_Data = np.zeros(shape=(h,w1))
+Training_Label = np.zeros(h)
+
+# Reading the Training Data Provided
+with open('data2.csv') as file:
+    line_count=0
+    for line in file:
+        line = line.strip()
+        line = line.split(',')
+        for i in range(9):
+            if i<8:
+                Training_Data[line_count][i] = line[i]
+            else:
+                Training_Label[line_count] = line[i]
+
+        line_count = line_count+1
+
+# Reading number of Training ETraining_Dataamples
+with open('test2.csv') as file:
+    line_count=0
+    for line in file:    
+        line_count = line_count+1
+
+# Defining Data Matrix
+w1 = 8
+h = line_count
+Test_Data = np.zeros(shape=(h,w1))
+
+# Reading the Test Data Provided
+with open('test2.csv') as file:
+    line_count=0
+    for line in file:
+        line = line.strip()
+        line = line.split(',')
+        for i in range(8):
+                Test_Data[line_count][i] = line[i]
+
+        line_count = line_count+1
+
+attributes  = list(range(len(Training_Data[0])))
+tree = DecisionTree(Training_Data, Training_Label, attributes, 8, 0,None, None, None)
+y = tree.classify(Test_Data)
+with open('15EC10071_2.out','w') as file:
+	for x in y:
+		file.write(str(int(x)))
+		file.write(" ")
+file.close()
diff --git a/Decision Trees/assignment2.pdf b/Decision Trees/assignment2.pdf
diff --git a/Decision Trees/data2.csv b/Decision Trees/data2.csv
@@ -0,0 +1,24 @@
+1,1,1,1,1,1,0,1,1
+1,1,1,1,1,1,0,0,1
+0,1,1,1,1,1,1,1,0
+1,1,1,1,1,0,0,1,1
+1,1,1,1,1,0,0,0,1
+1,1,1,0,1,1,0,1,1
+0,1,0,1,1,1,0,1,0
+1,1,1,0,1,1,0,0,1
+1,1,1,0,1,0,0,1,1
+1,1,1,0,1,0,0,0,1
+0,1,1,1,1,1,0,1,1
+0,1,1,1,1,1,0,0,1
+0,0,1,1,1,1,0,1,0
+0,1,1,1,1,0,0,1,1
+0,1,0,1,0,1,0,1,0
+0,0,0,1,1,1,0,1,0
+0,0,0,1,0,1,1,1,0
+0,1,1,1,1,0,0,0,1
+0,0,1,1,1,1,1,1,0
+0,1,1,0,1,1,0,1,1
+0,0,1,1,0,1,1,1,0
+0,0,0,1,0,1,1,1,0
+1,1,1,0,1,0,1,1,1
+1,1,0,0,1,0,1,1,1
diff --git a/Decision Trees/test2.csv b/Decision Trees/test2.csv
@@ -0,0 +1,4 @@
+0,1,1,1,1,1,1,1
+1,0,0,0,0,0,0,0
+0,1,1,0,1,0,0,0
+0,1,1,1,1,0,0,0
diff --git a/K Means Clustering/15EC10071_7.out b/K Means Clustering/15EC10071_7.out
@@ -0,0 +1 @@
+1 1 1 1 1 1 1 1 1 1 2 2 1 2 1 1 1 2 1 2 
diff --git a/K Means Clustering/15EC10071_7.py b/K Means Clustering/15EC10071_7.py
@@ -0,0 +1,86 @@
+# Name: Avinab Saha
+# Roll: 15EC10071
+# Assignment 7 ML CS60050
+# Tested with Python 2.7.12 (default, Nov 20 2017, 18:23:56) [GCC 5.4.0 20160609] on linux2
+
+import numpy as np
+
+def euclidean(P1,P2,w):
+    dist=0
+    for i in range(w):
+        dist =dist+ (P1[i]-P2[i])*(P1[i]-P2[i])
+    return(np.sqrt(dist))
+
+# Reading number of Training Examples
+with open('data7.csv') as file:
+    line_count=0
+    for line in file:    
+        line_count = line_count+1
+
+line = line.strip()
+line = line.split(',')
+no_of_attributes = len(line)
+
+# Defining Data Matrix
+w = no_of_attributes
+h = line_count
+Training_Data = np.zeros(shape=(h,w))
+Label = np.zeros(h,dtype=int)
+# Reading the Training Data Provided
+with open('data7.csv') as file:
+    line_count=0
+    for line in file:
+        line = line.strip()
+        line = line.split(',')
+        for i in range(w):
+                Training_Data[line_count][i] = line[i]
+        line_count = line_count+1
+
+
+#print(Training_Data)
+# Set Seed to generate same results always
+# np.random.seed(41)
+# Randomly Assign Cluster Centres
+
+one = np.random.random_integers(19)
+two = np.random.random_integers(19)
+while(one==two):
+    two = np.random.random_integers(19)
+#print(one,two)
+
+
+C1 = Training_Data[one,:]
+C2 = Training_Data[two,:]
+#print(Label)
+
+for loop in range(10):
+    # Assign Labels
+    for i in range(h):
+        d1= euclidean(C1,Training_Data[i,:],w)
+        d2= euclidean(C2,Training_Data[i,:],w)
+        if (d1<=d2):
+            Label [i]= 1
+        if (d1>d2):
+            Label[i] = 2
+    # Update Centre of Cluster
+    no_clusters_in_1 = np.count_nonzero(Label==1)
+    no_clusters_in_2 = np.count_nonzero(Label==2)
+    #print(no_clusters_in_1,no_clusters_in_2)
+    temp_C1= np.zeros(w)
+    temp_C2= np.zeros(w)
+    for i in range(h):
+        if(Label[i]==1):
+            temp_C1 = temp_C1+ Training_Data[i,:]
+        if(Label[i]==2):
+            temp_C2 = temp_C2+ Training_Data[i,:]
+    C1 = temp_C1/no_clusters_in_1
+    C2 = temp_C2/no_clusters_in_2
+    #print(Label)
+
+
+with open('15EC10071_7.out','w') as file:
+    #print(Label)
+    for x in Label:
+		file.write(str(int(x)))
+		file.write(" ")
+file.close()
diff --git a/K Means Clustering/assignment7.pdf b/K Means Clustering/assignment7.pdf